## Training

In [1]:
import numpy as np
import pandas as pd
import pickle
from collections import namedtuple
from IPython.display import display, HTML
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.feature_selection import RFECV
from sklearn.metrics import r2_score,make_scorer
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
from collections import Counter ,OrderedDict
from itertools import compress
%matplotlib inline

In [2]:
Dataset=namedtuple('Dataset','exchange df')
DatasetMLModel= namedtuple('DatasetMLModel','exchange df train_size tscv_split test_size')
Regressor= namedtuple('Regressor','name regressor_class params type')
FeatureSelection= namedtuple('FeatureSelection','dataset regressor params RFECV')

In [3]:
with open('datasets/set_of_datasets.pkl', 'rb') as input1:
    set_of_datasets = pickle.load(input1)
with open('datasets/fs_results.pkl', 'rb') as input2:
    fs_results = pickle.load(input2)
  

In [4]:
regressors={
    'LinearRegression': Regressor('LinearRegression', LinearRegression, None,'linear_model'), 
    'Lasso':Regressor('Lasso', Lasso, None,'linear_model'),
#Error it is for categorical features    'LogisticRegression':Regressor('LogisticRegression', LogisticRegression, None,'linear_model'),
    'DecisionTreeRegressor':Regressor('DecisionTreeRegressor', DecisionTreeRegressor, None,'tree'),
    'GradientBoostingRegressor':Regressor('GradientBoostingRegressor', GradientBoostingRegressor, None,'ensemble')
}
regressors_plus={
#     'Ridge':Regressor('Ridge', Ridge, None,'linear_model'),
    'SVR':Regressor('SVR', SVR, None,'svm'),
    'KernelRidge':Regressor('KernelRidge', KernelRidge, None,'svm'),
}
parameters_per_regressor={
    'LinearRegression':{},
    'Lasso': { 'alpha':[100,100,1000,10000]},
    'SVR': {'C': [1,10,100,1000,10000], 'gamma': [0.01, 0.1,1 ,10,100], 'kernel': ['rbf', 'sigmoid', 'poly']},
    'KernelRidge': { 'alpha':[1,10,100,100],'gamma': [0.01, 0.1,1 ,10,100],'kernel': ['rbf', 'sigmoid', 'poly']},
    'DecisionTreeRegressor': {'max_depth':range(1,15),'criterion':['mse','friedman_mse']},
    'GradientBoostingRegressor':{}
}


In [5]:
# TODO: Import 'make_scorer', 'DecisionTreeRegressor', and 'GridSearchCV'
from sklearn.metrics import make_scorer
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

def make_X_Y(dataset, features=['close'],target=['close_Target']):
    X=dataset.df[features]
    y=dataset.df[target]
    return X,y

def performance_metric(y_true, y_predict):
    """ Calculates and returns the performance score between 
        true and predicted values based on the metric chosen. """
    

    score = r2_score(y_true,y_predict)
    
    # Return the score
    return score


In [None]:
def fit_all(regressors,parameters_per_regressor,fs_results,set_of_datasets):
    columns=['Currency Pair','Regressor','Target','Dataset','Score','Features','BestEstimator']
    data=[]
    scoring_fnc = make_scorer(performance_metric)
    for dataset_key, fs_scores, target in fs_results:
        set_data=set_of_datasets[dataset_key]
        for index, row in fs_scores.iterrows():
            (currency_pair,regressor_key)=index
            dataset=set_data[currency_pair]
            features=row['features']
            X , y = make_X_Y(dataset,features,target)
            reg=regressors[regressor_key].regressor_class()
            parameters=parameters_per_regressor[regressor_key]
            cvts=TimeSeriesSplit(n_splits=dataset.tscv_split)
            print(  currency_pair,
                    regressor_key,
                    target,
                    dataset_key,)
            grid = GridSearchCV(reg, parameters,scoring_fnc, cv=cvts) 
            grid = grid.fit(X, y)
            data_row=[
                currency_pair,
                regressor_key,
                target,
                dataset_key,
                grid.best_score_,
                features,
                grid.best_estimator_
            ]
            data.append(data_row)
            for regressor_key, regressor  in regressors_plus.items():
                reg=regressor.regressor_class()
                parameters=parameters_per_regressor[regressor_key]
                grid = GridSearchCV(reg, parameters,scoring_fnc, cv=cvts)   
                grid = grid.fit(X, y)
                data_row=[
                    currency_pair,
                    regressor_key,
                    target,
                    dataset_key,
                    grid.best_score_,
                    features,
                    grid.best_estimator_
                ]
                data.append(data_row)
    result_cv=pd.DataFrame(data,columns=columns)
    result_cv.set_index(['Currency Pair','Regressor','Target','Dataset'],inplace=True)
    return result_cv

scores=fit_all(regressors,parameters_per_regressor,fs_results,set_of_datasets)
            
            
    
    


In [10]:
scores.sort_values('Score',ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Score,Features,BestEstimator
Currency Pair,Regressor,Target,Dataset,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
btc_brl,LinearRegression,close_Target,log_close_series,0.975044,"[close, EWMA26, EWMA12, EWMA9, Returns, log_re...","LinearRegression(copy_X=True, fit_intercept=Tr..."
btc_brl,LinearRegression,close_Target,log_close_series,0.975044,"[close, EWMA26, EWMA12, EWMA9, Returns, log_re...","LinearRegression(copy_X=True, fit_intercept=Tr..."
btc_brl,LinearRegression,close_Target,log_close_series,0.975044,"[close, EWMA26, EWMA12, EWMA9, Returns, log_re...","LinearRegression(copy_X=True, fit_intercept=Tr..."
btc_brl,LinearRegression,close_Target,clean_close_series,0.972352,"[close, Returns, log_return, log_MACD]","LinearRegression(copy_X=True, fit_intercept=Tr..."
btc_brl,LinearRegression,close_Target,clean_close_series,0.972352,"[close, Returns, log_return, log_MACD]","LinearRegression(copy_X=True, fit_intercept=Tr..."
btc_brl,LinearRegression,close_Target,clean_close_series,0.972352,"[close, Returns, log_return, log_MACD]","LinearRegression(copy_X=True, fit_intercept=Tr..."
btc_brl,Lasso,close_Target,clean_close_series,0.972159,[close],"Lasso(alpha=100, copy_X=True, fit_intercept=Tr..."
btc_brl,Lasso,close_Target,clean_close_series,0.972159,[close],"Lasso(alpha=100, copy_X=True, fit_intercept=Tr..."
btc_brl,Lasso,close_Target,clean_close_series,0.972159,[close],"Lasso(alpha=100, copy_X=True, fit_intercept=Tr..."
btc_brl,LinearRegression,close_Target,rescaled_close_series,0.971900,"[close, log_return, log_MACD]","LinearRegression(copy_X=True, fit_intercept=Tr..."
