In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from scipy import stats
import joblib
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.model_selection import GroupShuffleSplit

In [None]:
def rmse(y_true, y_pred):
    rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))
    return rmse

def lcc(y_true, y_pred):
    lcc, _ = stats.pearsonr(y_true, y_pred)
    return lcc

def srocc(y_true, y_pred):
    srocc, _ = stats.spearmanr(y_true, y_pred)
    return srocc

In [None]:
scorer = {}
scorer['rmse'] = metrics.make_scorer(rmse, greater_is_better=False)
scorer['lcc'] = metrics.make_scorer(lcc, greater_is_better=True)
scorer['srocc'] = metrics.make_scorer(srocc, greater_is_better=True)
scorer

In [None]:
features = pd.read_pickle('features.pkl')
labels = pd.read_pickle('labels.pkl')

In [None]:
parameters_grid_search_3rd = joblib.load('parameters_grid_search_3rd.sav')

In [None]:
labels.index

In [None]:
features.head()

In [None]:
group_label = np.arange(len(features.index) / 3)
group_label = np.matlib.repmat(group_label,3,1)
group_label = group_label.reshape(-1,1, order='F')

In [None]:
Reg_video = RandomForestRegressor(random_state=8, n_jobs=-1)

In [None]:
def CV_Generator(features, labels, group_label, n=8, test_ratio=0.2):
    CV_Group = GroupShuffleSplit(n_splits=n, test_size=test_ratio, random_state=8)
    for train, test in CV_Group.split(features, labels, groups=group_label):
        yield train, test

In [None]:
parameters_grid_GCV_3MET = {}

In [None]:
parameters_grid_GCV_3MET['n_estimators'] = [int(x) for x in np.arange(140, 170, 2)]
parameters_grid_GCV_3MET['criterion'] = ['mse', 'mae']
parameters_grid_GCV_3MET['max_depth'] = [int(x) for x in np.arange(7,13,1)]
parameters_grid_GCV_3MET['max_depth'].append(None)
parameters_grid_GCV_3MET['min_samples_split'] = [2, 3, 4, 5, 6]
parameters_grid_GCV_3MET['min_samples_leaf'] = [1, 2, 3, 4, 5]
parameters_grid_GCV_3MET['max_features'] = ['auto', 'sqrt', 'log2']
parameters_grid_GCV_3MET['bootstrap'] = [True, False]
parameters_grid_GCV_3MET['verbose'] = [0, 1]

In [None]:
parameters_grid_GCV_3MET

In [None]:
parameters_grid_search_GCV_3MET = GridSearchCV(estimator = Reg_video, param_grid = parameters_grid_GCV_3MET, 
                          cv = CV_Generator(features, labels, group_label), n_jobs = -1, verbose = 1, return_train_score=True, 
                                      error_score = np.nan, scoring = scorer, refit = 'rmse', iid=False)

In [None]:
parameters_grid_search_GCV_3MET.fit(features, labels)

In [None]:
parameters_grid_search_GCV_3MET.best_params_

In [None]:
joblib.dump(parameters_grid_search_GCV_3MET, 'parameters_grid_search_GCV_3MET.sav')