In [16]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from scipy import stats
import joblib
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.model_selection import GroupShuffleSplit
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

In [17]:
%matplotlib inline

In [18]:
def rmse(y_true, y_pred):
    rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
    return rmse

def lcc(y_true, y_pred):
    lcc, _ = stats.pearsonr(y_true, y_pred)
    return lcc

def srocc(y_true, y_pred):
    srocc, _ = stats.spearmanr(y_true, y_pred)
    return srocc

In [19]:
def accuracy(y_true, y_pred):
    ratio = abs(y_true - y_pred) / y_true
    return 1 - np.mean(ratio)

In [20]:
scorer = {}
scorer['rmse'] = metrics.make_scorer(rmse, greater_is_better=False)
scorer['lcc'] = metrics.make_scorer(lcc, greater_is_better=True)
scorer['srocc'] = metrics.make_scorer(srocc, greater_is_better=True)
scorer

{'rmse': make_scorer(rmse, greater_is_better=False),
 'lcc': make_scorer(lcc),
 'srocc': make_scorer(srocc)}

In [21]:
features = pd.read_pickle('features.pkl')
labels = pd.read_pickle('labels.pkl')

In [22]:
features_selected = features.loc[:,['msssim', 'TA', 'psnrhvsm']]

In [72]:
Aug_tool = PolynomialFeatures(degree=3)

In [73]:
features_aug = Aug_tool.fit_transform(features_selected)

In [23]:
group_label = np.arange(len(features.index) / 3)
group_label = np.matlib.repmat(group_label,3,1)
group_label = group_label.reshape(-1,1, order='F')

In [24]:
Reg_video = RandomForestRegressor(random_state=8, n_jobs=-1)

In [25]:
def CV_Generator(features, labels, group_label, n=8, test_ratio=0.2):
    CV_Group = GroupShuffleSplit(n_splits=n, test_size=test_ratio, random_state=8)
    for train, test in CV_Group.split(features, labels, groups=group_label):
        yield train, test

In [74]:
parameters_grid_GCV_3MET = {}

In [75]:
parameters_grid_GCV_3MET['n_estimators'] = [181]
parameters_grid_GCV_3MET['criterion'] = ['mae','mse']
parameters_grid_GCV_3MET['max_depth'] = [3, 4, 5]
parameters_grid_GCV_3MET['min_samples_split'] = [2, 3, 4]
parameters_grid_GCV_3MET['min_samples_leaf'] = [3, 4, 5]
parameters_grid_GCV_3MET['max_features'] = ['sqrt', 'auto']
parameters_grid_GCV_3MET['bootstrap'] = [True]
parameters_grid_GCV_3MET['verbose'] = [0]
parameters_grid_GCV_3MET['oob_score'] = [True]

In [76]:
parameters_grid_GCV_3MET

{'n_estimators': [181],
 'criterion': ['mae', 'mse'],
 'max_depth': [3, 4, 5],
 'min_samples_split': [2, 3, 4],
 'min_samples_leaf': [3, 4, 5],
 'max_features': ['sqrt', 'auto'],
 'bootstrap': [True],
 'verbose': [0],
 'oob_score': [True]}

In [77]:
parameters_grid_search_GCV_3MET = GridSearchCV(estimator = Reg_video, param_grid = parameters_grid_GCV_3MET, 
                          cv = CV_Generator(features_aug, labels, group_label), n_jobs = -1, verbose = 1, return_train_score=True, 
                                      error_score = np.nan, scoring = scorer, refit = 'rmse', iid=False)

In [78]:
parameters_grid_search_GCV_3MET.fit(features_aug, labels)

Fitting 8 folds for each of 108 candidates, totalling 864 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:   18.1s


KeyboardInterrupt: 

In [None]:
parameters_grid_search_GCV_3MET.best_params_

In [71]:
idx = 0
for train_idx, test_idx in CV_Generator(features_selected, labels, group_label, 16, test_ratio=0.2):
    train_features = features_selected.iloc[train_idx]
    train_labels = labels.iloc[train_idx]
    test_features = features_selected.iloc[test_idx]
    test_labels = labels.iloc[test_idx]
    best_model = parameters_grid_search_GCV_3MET.best_estimator_.fit(train_features, train_labels)
    train_pred = best_model.predict(train_features)
    test_pred = best_model.predict(test_features)
    if idx == 5:
        final_model = best_model
        #print('Model saved')
    print('RMSE:', rmse(train_labels, train_pred), rmse(test_labels, test_pred))
    print('LCC:', lcc(train_labels, train_pred), lcc(test_labels, test_pred))
    print('SROCC:', srocc(train_labels, train_pred), srocc(test_labels, test_pred))
    #print('Accuracy:', accuracy(train_labels, train_pred), accuracy(test_labels, test_pred))
    print('\n')
    idx += 1

RMSE: 6.364140215989266 7.517605994055263
LCC: 0.8478240559448553 0.7900208528773105
SROCC: 0.8592105826448049 0.7844777672464265


RMSE: 6.1131274580896555 8.840568312716524
LCC: 0.8371481310926322 0.8308902581965701
SROCC: 0.8498822723575495 0.8411840411840412


RMSE: 5.982007995348969 8.412565051454418
LCC: 0.8774053165977336 0.5907495443850971
SROCC: 0.8940065927479771 0.6047888974913621


RMSE: 6.2313942990825915 7.555863392904341
LCC: 0.8592652436636998 0.7493140646837659
SROCC: 0.8722034333661542 0.7624195624195624


RMSE: 6.126414604791773 8.118181221499444
LCC: 0.8662935322280394 0.6870930202043839
SROCC: 0.876634273727471 0.6836550836550838


RMSE: 6.648859335823215 4.9234188236563945
LCC: 0.8411988728067153 0.8875813382749393
SROCC: 0.8515347403570357 0.8813384813384814


RMSE: 6.22935876099841 7.429483876391015
LCC: 0.8497583663921751 0.8330031077734422
SROCC: 0.86844899182328 0.8746460746460748


RMSE: 6.582688024424035 6.294719584950135
LCC: 0.8407839155056009 0.843838816