In [31]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import sklearn.metrics as metrics
from sklearn.model_selection import GroupShuffleSplit

In [46]:
from sklearn import svm
from sklearn import multiclass 
from scipy import stats

In [33]:
from sklearn import preprocessing
from sklearn import utils

In [34]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [35]:
dataset_socres = pd.read_pickle('labels.pkl')
features = pd.read_pickle('features.pkl')
nr_old=pd.read_pickle('nr_metrics.pkl')

In [36]:
def CV_Generator(features, labels, group_label, n=8, test_ratio=0.2):
    CV_Group = GroupShuffleSplit(n_splits=n, test_size=test_ratio, random_state=8)
    for train, test in CV_Group.split(features, labels, groups=group_label):
        yield train, test

In [61]:
def accuracy(y_true, y_pred):
    ratio = abs(y_true - y_pred) / y_true
    return 1 - np.mean(ratio)

In [37]:
features.head()

Unnamed: 0,msssim,psnr,psnrhvs,psnrhvsm,ssim,vifp,Blockiness,Blockloss,Blur,Contrast,Exposure(bri),Flickering,Interlace,Noise,SA,TA
G10BoatInPark_ERP_4096x2048_fps30_qp27_14547k.mp4,0.993424,41.829451,39.401728,43.855814,0.989905,0.818723,0.918001,4.662472,6.318822,56.274408,122.973648,0.973981,0.004543,0.385048,48.017774,12.572215
G10BoatInPark_ERP_4096x2048_fps30_qp37_3270k.mp4,0.978172,37.045777,34.074029,36.540714,0.975696,0.721947,0.923023,5.707591,6.802874,56.220687,122.235861,0.974016,0.001439,0.250219,45.613703,12.228131
G10BoatInPark_ERP_4096x2048_fps30_qp42_1507k.mp4,0.959965,34.534499,31.430402,33.216521,0.962685,0.672978,0.92307,6.641128,7.194109,56.157094,121.932088,0.973082,0.001407,0.243489,42.835611,11.782598
G10BodybuildingWorkout_ERP_7680x3840_fps29.97_qp27_6105k.mp4,0.997337,46.713699,44.069635,48.525252,0.99713,0.915188,0.941557,1.083126,10.663758,107.170623,125.0,0.975835,0.000408,0.691092,55.521111,3.173898
G10BodybuildingWorkout_ERP_7680x3840_fps29.97_qp37_913k.mp4,0.993475,42.391911,39.490941,42.097111,0.994158,0.873338,0.916841,0.990191,10.873596,107.176925,124.268885,0.97029,0.001945,0.350776,55.119636,3.099582


In [38]:
group_label = np.arange(len(features.index) / 3)
group_label = np.matlib.repmat(group_label,3,1)
group_label = group_label.reshape(-1,1, order='F')

In [39]:
def rmse(y_true, y_pred):
    rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
    return rmse

def lcc(y_true, y_pred):
    lcc, _ = stats.pearsonr(y_true, y_pred)
    return lcc

def srocc(y_true, y_pred):
    srocc, _ = stats.spearmanr(y_true, y_pred)
    return srocc

In [40]:
scorer = {}
scorer['rmse'] = metrics.make_scorer(rmse, greater_is_better=False)
scorer['lcc'] = metrics.make_scorer(lcc, greater_is_better=True)
scorer['srocc'] = metrics.make_scorer(srocc, greater_is_better=True)
scorer

{'rmse': make_scorer(rmse, greater_is_better=False),
 'lcc': make_scorer(lcc),
 'srocc': make_scorer(srocc)}

In [41]:
model_svr = svm.SVR(max_iter=3000)

In [81]:
#grid search for the best parameters for SVR
ParameterGrid={}
ParameterGrid['C']=[27] 
ParameterGrid['gamma']= [0.01, 0, 'auto', 'scale']
ParameterGrid['verbose'] = [0, 1]
ParameterGrid['degree'] = [2, 3]
ParameterGrid['kernel'] = ['rbf', 'poly']
ParameterGrid['tol'] = [0.1, 0.01, 0.001]

In [82]:
grid_search_svr= GridSearchCV(estimator = model_svr, param_grid = ParameterGrid, 
                          cv = CV_Generator(features, dataset_socres, group_label), n_jobs = -1, verbose = 1, return_train_score=True, 
                                      error_score = np.nan,scoring = scorer, refit = 'rmse', iid=False)

In [83]:
grid_search_svr.fit(features, dataset_socres)

Fitting 8 folds for each of 96 candidates, totalling 768 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 768 out of 768 | elapsed:    1.3s finished


GridSearchCV(cv=<generator object CV_Generator at 0x1a0ce0e308>,
       error_score=nan,
       estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=3000, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=False, n_jobs=-1,
       param_grid={'C': [27], 'gamma': [0.01, 0, 'auto', 'scale'], 'verbose': [0, 1], 'degree': [2, 3], 'kernel': ['rbf', 'poly'], 'tol': [0.1, 0.01, 0.001]},
       pre_dispatch='2*n_jobs', refit='rmse', return_train_score=True,
       scoring={'rmse': make_scorer(rmse, greater_is_better=False), 'lcc': make_scorer(lcc), 'srocc': make_scorer(srocc)},
       verbose=1)

In [84]:
grid_search_svr.best_params_

{'C': 27,
 'degree': 2,
 'gamma': 'scale',
 'kernel': 'rbf',
 'tol': 0.01,
 'verbose': 0}

In [85]:
idx = 0
for train_idx, test_idx in CV_Generator(features, dataset_socres, group_label, 16, test_ratio=0.2):
    train_features = features.iloc[train_idx]
    train_labels = dataset_socres.iloc[train_idx]
    test_features = features.iloc[test_idx]
    test_labels = dataset_socres.iloc[test_idx]
    best_model = grid_search_svr.best_estimator_.fit(train_features, train_labels)
    train_pred = best_model.predict(train_features)
    test_pred = best_model.predict(test_features)
    if idx == 5:
        final_model = best_model
        print('Model saved')
    print('RMSE:', rmse(train_labels, train_pred), rmse(test_labels, test_pred))
    print('LCC:', lcc(train_labels, train_pred), lcc(test_labels, test_pred))
    print('SROCC:', srocc(train_labels, train_pred), srocc(test_labels, test_pred))
    #print('Accuracy:', accuracy(train_labels, train_pred), accuracy(test_labels, test_pred))
    print('\n')
    idx += 1

RMSE: 3.6764030482080408 8.452125416951658
LCC: 0.9558798593713201 0.7009726928323456
SROCC: 0.9666894986942933 0.7111969111969112


RMSE: 3.859803069944364 10.357298388865042
LCC: 0.9427640048579708 0.7620758174375366
SROCC: 0.9553191489361702 0.7848133848133849


RMSE: 3.863953556146061 7.316810128544472
LCC: 0.9528513010449268 0.6821535641638705
SROCC: 0.9583715056295219 0.6501930501930502


RMSE: 3.609176860364414 9.643204046975637
LCC: 0.9585977110515748 0.5595056288617928
SROCC: 0.964763046363286 0.5935649935649936


RMSE: 3.094085229737944 8.093377419573724
LCC: 0.9693810709385112 0.689307777695586
SROCC: 0.9687015711289011 0.7317889317889319


Model saved
RMSE: 3.766662652593603 5.932914242022221
LCC: 0.9550228671780459 0.8360403339111226
SROCC: 0.9638640352754826 0.8870012870012871


RMSE: 3.9952769220025712 7.7913933321472895
LCC: 0.9441189440160276 0.8087969190896183
SROCC: 0.952943191061261 0.8383526383526384


RMSE: 3.9561406981788347 7.538167551843059
LCC: 0.9504845499922

(177, 16)