In [44]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from scipy import stats
import joblib
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.model_selection import GroupShuffleSplit
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

In [45]:
%matplotlib inline

In [46]:
def rmse(y_true, y_pred):
    rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
    return rmse

def lcc(y_true, y_pred):
    lcc, _ = stats.pearsonr(y_true, y_pred)
    return lcc

def srocc(y_true, y_pred):
    srocc, _ = stats.spearmanr(y_true, y_pred)
    return srocc

In [47]:
def accuracy(y_true, y_pred):
    ratio = abs(y_true - y_pred) / y_true
    return 1 - np.mean(ratio)

In [48]:
scorer = {}
scorer['rmse'] = metrics.make_scorer(rmse, greater_is_better=False)
scorer['lcc'] = metrics.make_scorer(lcc, greater_is_better=True)
scorer['srocc'] = metrics.make_scorer(srocc, greater_is_better=True)
scorer

{'rmse': make_scorer(rmse, greater_is_better=False),
 'lcc': make_scorer(lcc),
 'srocc': make_scorer(srocc)}

In [49]:
features = pd.read_pickle('features.pkl')
labels = pd.read_pickle('labels.pkl')

In [50]:
features_selected = features.loc[:,['msssim', 'TA', 'psnrhvsm']]

In [51]:
Aug_tool = PolynomialFeatures(degree=1)

In [52]:
features_aug = Aug_tool.fit_transform(features_selected)

In [53]:
group_label = np.arange(len(features.index) / 3)
group_label = np.matlib.repmat(group_label,3,1)
group_label = group_label.reshape(-1,1, order='F')

In [54]:
Reg_video = RandomForestRegressor(random_state=8, n_jobs=-1)

In [55]:
def CV_Generator(features, labels, group_label, n=8, test_ratio=0.2):
    CV_Group = GroupShuffleSplit(n_splits=n, test_size=test_ratio, random_state=8)
    for train, test in CV_Group.split(features, labels, groups=group_label):
        yield train, test

In [56]:
parameters_grid_GCV_3MET = {}

In [57]:
parameters_grid_GCV_3MET['n_estimators'] = [181]
parameters_grid_GCV_3MET['criterion'] = ['mae','mse']
parameters_grid_GCV_3MET['max_depth'] = [3, 4, 5]
parameters_grid_GCV_3MET['min_samples_split'] = [2, 3, 4]
parameters_grid_GCV_3MET['min_samples_leaf'] = [3, 4, 5]
parameters_grid_GCV_3MET['max_features'] = ['sqrt', 'auto']
parameters_grid_GCV_3MET['bootstrap'] = [True]
parameters_grid_GCV_3MET['verbose'] = [0]
parameters_grid_GCV_3MET['oob_score'] = [True]

In [58]:
parameters_grid_GCV_3MET

{'n_estimators': [181],
 'criterion': ['mae', 'mse'],
 'max_depth': [3, 4, 5],
 'min_samples_split': [2, 3, 4],
 'min_samples_leaf': [3, 4, 5],
 'max_features': ['sqrt', 'auto'],
 'bootstrap': [True],
 'verbose': [0],
 'oob_score': [True]}

In [59]:
parameters_grid_search_GCV_3MET = GridSearchCV(estimator = Reg_video, param_grid = parameters_grid_GCV_3MET, 
                          cv = CV_Generator(features_aug, labels, group_label), n_jobs = -1, verbose = 1, return_train_score=True, 
                                      error_score = np.nan, scoring = scorer, refit = 'rmse', iid=False)

In [60]:
parameters_grid_search_GCV_3MET.fit(features_aug, labels)

Fitting 8 folds for each of 108 candidates, totalling 864 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:   20.1s
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:   47.2s
[Parallel(n_jobs=-1)]: Done 776 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 864 out of 864 | elapsed:  1.4min finished


GridSearchCV(cv=<generator object CV_Generator at 0x1a0d199f68>,
       error_score=nan,
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=-1,
           oob_score=False, random_state=8, verbose=0, warm_start=False),
       fit_params=None, iid=False, n_jobs=-1,
       param_grid={'n_estimators': [181], 'criterion': ['mae', 'mse'], 'max_depth': [3, 4, 5], 'min_samples_split': [2, 3, 4], 'min_samples_leaf': [3, 4, 5], 'max_features': ['sqrt', 'auto'], 'bootstrap': [True], 'verbose': [0], 'oob_score': [True]},
       pre_dispatch='2*n_jobs', refit='rmse', return_train_score=True,
       scoring={'rmse': make_scorer(rmse, greater_is_better=False), 'lcc': make_scorer(lcc), 'srocc': make_scorer(srocc)},
       verbose=1)

In [61]:
parameters_grid_search_GCV_3MET.best_params_

{'bootstrap': True,
 'criterion': 'mae',
 'max_depth': 4,
 'max_features': 'sqrt',
 'min_samples_leaf': 5,
 'min_samples_split': 4,
 'n_estimators': 181,
 'oob_score': True,
 'verbose': 0}

In [62]:
idx = 0
for train_idx, test_idx in CV_Generator(features_selected, labels, group_label, 16, test_ratio=0.2):
    train_features = features_selected.iloc[train_idx]
    train_labels = labels.iloc[train_idx]
    test_features = features_selected.iloc[test_idx]
    test_labels = labels.iloc[test_idx]
    best_model = parameters_grid_search_GCV_3MET.best_estimator_.fit(train_features, train_labels)
    train_pred = best_model.predict(train_features)
    test_pred = best_model.predict(test_features)
    if idx == 5:
        final_model = best_model
        #print('Model saved')
    print('RMSE:', rmse(train_labels, train_pred), rmse(test_labels, test_pred))
    print('LCC:', lcc(train_labels, train_pred), lcc(test_labels, test_pred))
    print('SROCC:', srocc(train_labels, train_pred), srocc(test_labels, test_pred))
    #print('Accuracy:', accuracy(train_labels, train_pred), accuracy(test_labels, test_pred))
    print('\n')
    idx += 1

RMSE: 6.803582613538476 8.04838668026069
LCC: 0.8319591677526218 0.7743754058193858
SROCC: 0.8508797465644932 0.7489542457107392


RMSE: 6.69389247742919 9.49204830063548
LCC: 0.8162366840813029 0.7951172857897284
SROCC: 0.828725544757909 0.7791505791505792


RMSE: 6.58292680001967 9.524238550358113
LCC: 0.8530049845499608 0.5261221263070012
SROCC: 0.8746045404615461 0.5444015444015444


RMSE: 6.908027251219146 7.7722058073036395
LCC: 0.8350509928341951 0.741345918650052
SROCC: 0.8523083739766578 0.7642213642213642


RMSE: 6.7000005563393685 8.469949652086225
LCC: 0.842268978382449 0.7067464654269381
SROCC: 0.8522621950064367 0.6748182007666963


RMSE: 7.21016573426127 5.1994655838282355
LCC: 0.8183041391124182 0.8998010628202033
SROCC: 0.8348901922171326 0.8916344916344917


RMSE: 6.855516662439892 7.975049846514465
LCC: 0.8231856471821565 0.8392829534109656
SROCC: 0.8372542031493762 0.8682110682110683


RMSE: 7.094420138455658 6.889633969356672
LCC: 0.8222163337415498 0.8086478799785