In [1]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import sklearn.metrics as metrics
from sklearn.model_selection import GroupShuffleSplit

In [2]:
from sklearn import svm
from sklearn import multiclass 
from scipy import stats

In [3]:
from sklearn import preprocessing
from sklearn import utils

In [4]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [5]:
dataset_socres = pd.read_pickle('labels.pkl')
features = pd.read_pickle('features.pkl')
nr_old=pd.read_pickle('nr_metrics.pkl')

In [6]:
def CV_Generator(features, labels, group_label, n=8, test_ratio=0.2):
    CV_Group = GroupShuffleSplit(n_splits=n, test_size=test_ratio, random_state=8)
    for train, test in CV_Group.split(features, labels, groups=group_label):
        yield train, test

In [7]:
def accuracy(y_true, y_pred):
    ratio = abs(y_true - y_pred) / y_true
    return 1 - np.mean(ratio)

In [8]:
features.head()

Unnamed: 0,msssim,psnr,psnrhvs,psnrhvsm,ssim,vifp,Blockiness,Blockloss,Blur,Contrast,Exposure(bri),Flickering,Interlace,Noise,SA,TA
G10BoatInPark_ERP_4096x2048_fps30_qp27_14547k.mp4,0.993424,41.829451,39.401728,43.855814,0.989905,0.818723,0.918001,4.662472,6.318822,56.274408,122.973648,0.973981,0.004543,0.385048,48.017774,12.572215
G10BoatInPark_ERP_4096x2048_fps30_qp37_3270k.mp4,0.978172,37.045777,34.074029,36.540714,0.975696,0.721947,0.923023,5.707591,6.802874,56.220687,122.235861,0.974016,0.001439,0.250219,45.613703,12.228131
G10BoatInPark_ERP_4096x2048_fps30_qp42_1507k.mp4,0.959965,34.534499,31.430402,33.216521,0.962685,0.672978,0.92307,6.641128,7.194109,56.157094,121.932088,0.973082,0.001407,0.243489,42.835611,11.782598
G10BodybuildingWorkout_ERP_7680x3840_fps29.97_qp27_6105k.mp4,0.997337,46.713699,44.069635,48.525252,0.99713,0.915188,0.941557,1.083126,10.663758,107.170623,125.0,0.975835,0.000408,0.691092,55.521111,3.173898
G10BodybuildingWorkout_ERP_7680x3840_fps29.97_qp37_913k.mp4,0.993475,42.391911,39.490941,42.097111,0.994158,0.873338,0.916841,0.990191,10.873596,107.176925,124.268885,0.97029,0.001945,0.350776,55.119636,3.099582


In [9]:
group_label = np.arange(len(features.index) / 3)
group_label = np.matlib.repmat(group_label,3,1)
group_label = group_label.reshape(-1,1, order='F')

In [10]:
def rmse(y_true, y_pred):
    rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
    return rmse

def lcc(y_true, y_pred):
    lcc, _ = stats.pearsonr(y_true, y_pred)
    return lcc

def srocc(y_true, y_pred):
    srocc, _ = stats.spearmanr(y_true, y_pred)
    return srocc

In [11]:
scorer = {}
scorer['rmse'] = metrics.make_scorer(rmse, greater_is_better=False)
scorer['lcc'] = metrics.make_scorer(lcc, greater_is_better=True)
scorer['srocc'] = metrics.make_scorer(srocc, greater_is_better=True)
scorer

{'rmse': make_scorer(rmse, greater_is_better=False),
 'lcc': make_scorer(lcc),
 'srocc': make_scorer(srocc)}

In [12]:
model_svr = svm.SVR(max_iter=3000)

In [28]:
#grid search for the best parameters for SVR
ParameterGrid={}
ParameterGrid['C']=[27] 
ParameterGrid['gamma']= [0.01, 0, 'auto', 'scale']
ParameterGrid['verbose'] = [0, 1]
ParameterGrid['degree'] = [2, 3]
ParameterGrid['kernel'] = ['rbf', 'poly']
ParameterGrid['tol'] = [0.1, 0.01, 0.001]
ParameterGrid['epsilon'] = np.arange(0.05,0.15,0.01)

In [29]:
ParameterGrid

{'C': [27],
 'gamma': [0.01, 0, 'auto', 'scale'],
 'verbose': [0, 1],
 'degree': [2, 3],
 'kernel': ['rbf', 'poly'],
 'tol': [0.1, 0.01, 0.001],
 'epsilon': array([0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.12, 0.13, 0.14])}

In [30]:
grid_search_svr= GridSearchCV(estimator = model_svr, param_grid = ParameterGrid, 
                          cv = CV_Generator(features, dataset_socres, group_label), n_jobs = -1, verbose = 1, return_train_score=True, 
                                      error_score = np.nan,scoring = scorer, refit = 'rmse', iid=False)

In [31]:
grid_search_svr.fit(features, dataset_socres)

Fitting 8 folds for each of 960 candidates, totalling 7680 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 3080 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 7680 out of 7680 | elapsed:   13.7s finished


GridSearchCV(cv=<generator object CV_Generator at 0x1a0d656990>,
       error_score=nan,
       estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=3000, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=False, n_jobs=-1,
       param_grid={'C': [27], 'gamma': [0.01, 0, 'auto', 'scale'], 'verbose': [0, 1], 'degree': [2, 3], 'kernel': ['rbf', 'poly'], 'tol': [0.1, 0.01, 0.001], 'epsilon': array([0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.12, 0.13, 0.14])},
       pre_dispatch='2*n_jobs', refit='rmse', return_train_score=True,
       scoring={'rmse': make_scorer(rmse, greater_is_better=False), 'lcc': make_scorer(lcc), 'srocc': make_scorer(srocc)},
       verbose=1)

In [32]:
grid_search_svr.best_params_

{'C': 27,
 'degree': 2,
 'epsilon': 0.08000000000000002,
 'gamma': 'scale',
 'kernel': 'rbf',
 'tol': 0.1,
 'verbose': 0}

In [33]:
idx = 0
for train_idx, test_idx in CV_Generator(features, dataset_socres, group_label, 16, test_ratio=0.2):
    train_features = features.iloc[train_idx]
    train_labels = dataset_socres.iloc[train_idx]
    test_features = features.iloc[test_idx]
    test_labels = dataset_socres.iloc[test_idx]
    best_model = grid_search_svr.best_estimator_.fit(train_features, train_labels)
    train_pred = best_model.predict(train_features)
    test_pred = best_model.predict(test_features)
    if idx == 5:
        final_model = best_model
        print('Model saved')
    print('RMSE:', rmse(train_labels, train_pred), rmse(test_labels, test_pred))
    print('LCC:', lcc(train_labels, train_pred), lcc(test_labels, test_pred))
    print('SROCC:', srocc(train_labels, train_pred), srocc(test_labels, test_pred))
    #print('Accuracy:', accuracy(train_labels, train_pred), accuracy(test_labels, test_pred))
    print('\n')
    idx += 1

RMSE: 3.679305983102991 8.447124634637527
LCC: 0.9557976219408639 0.7014188937172912
SROCC: 0.9665439445181727 0.7114543114543116


RMSE: 3.865605663882623 10.360236665696121
LCC: 0.9425969213777985 0.7622921107780034
SROCC: 0.9554090500449505 0.7830115830115831


RMSE: 3.8669991529318186 7.3125797693877175
LCC: 0.9527338343657263 0.6827283745966812
SROCC: 0.9580889592876406 0.6501930501930502


RMSE: 3.6078090738936512 9.638508168914402
LCC: 0.9585994139979798 0.5600113880330159
SROCC: 0.9647116743011258 0.5935649935649936


RMSE: 3.0953422606034646 8.090850882375712
LCC: 0.9693102514237563 0.6894950271167003
SROCC: 0.9686202320304805 0.7307593307593309


Model saved
RMSE: 3.769362328014192 5.923657798219429
LCC: 0.9548769409670231 0.8363670355518922
SROCC: 0.9641893916691638 0.8870012870012871


RMSE: 3.999601846833558 7.791944246368636
LCC: 0.9439344920820284 0.8089203223191738
SROCC: 0.952420908429299 0.8383526383526384


RMSE: 3.959852497807788 7.541371274194503
LCC: 0.95033834591

(177, 16)