# Functions ``Fit`` and ``Predict`` for Model of dynamic selection of Regression model

In [10]:
class Dyn_reg_model():
    
    def __init__(self, reg_models, clf_model):
        
        self.reg_models = reg_models
        self.clf_model = clf_model
        self.label_encoder = None
        self.data_frame = None
        self.fitted_ = False  
                    
    def fit(self, X, y):
        
        self.data_frame = pd.DataFrame()
        kf =KFold(n_splits=5)
        
        for model in self.reg_models:
            y_predicted = np.zeros(X.shape[0])
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                self.reg_models[model].fit(X_train, y_train)
                y_pred = self.reg_models[model].predict(X_test)
                y_pred = np.array(y_pred)
                y_predicted[test_index] = y_pred
            self.data_frame[model] = (y - y_predicted)**2
            
            
        best_model_index = []
        for index, row in self.data_frame.iterrows():
            best_model_index.append(row.idxmin())
        self.data_frame['best_model_index'] = best_model_index
        clf_model = self.clf_model
        self.label_encoder = LabelEncoder()
        labels = self.label_encoder.fit_transform(self.data_frame.best_model_index)
        self.clf_model.fit(X, labels)
        
        for model in self.reg_models:
            self.reg_models[model].fit(X, y)
            
        self.fitted_ = True

    def predict(self, X):
        
        if not self.fitted_:
            print('Model is not fitted yet!!!') 
        else:
            predicted = []
            for x in X:
                model_index = self.clf_model.predict(x.reshape(1, -1))
                model_name = self.label_encoder.inverse_transform(model_index)[0]
                pred = self.reg_models[model_name].predict(x.reshape(1, -1))[0]
                predicted.append(pred)
        return np.array(predicted)

## Cross-validation function

In [11]:
def cv_score(model, X, y, cv = 4):
    kf = KFold(n_splits = cv)
    result_test = []
    result_train = []
    y_pred = np.zeros(y.shape)
    for train_index, test_index in kf.split(X, y):
        model.fit(X[train_index], y[train_index])
        result_train.append(mean_squared_error(y[train_index], model.predict(X[train_index])))
        result_test.append(mean_squared_error(y[test_index], model.predict(X[test_index])))
        y_pred[test_index] = model.predict(X[test_index])
    return np.array(result_test), np.array(result_train), y_pred

### Import Libraries

In [12]:
import pandas as pd
import numpy as np
import matplotlib
import math
from time import time
from copy import deepcopy
import matplotlib.pyplot as plt
from scipy.stats import skew


%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score

#Regression Models
from sklearn.linear_model import LassoCV, RidgeCV, ElasticNetCV
from sklearn.kernel_ridge import KernelRidge
from lightgbm import LGBMRegressor
from sklearn.tree import DecisionTreeRegressor
import xgboost
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor

#Classification models
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier,ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from lightgbm import LGBMClassifier

### Set of models

In [29]:
mr_models = {
#             'lr' : LassoCV(alphas = [1, 0.1, 0.001, 0.0005], cv = 4), 
#             'kr' : KernelRidge(alpha=0.4, kernel='linear'),
#             'rr' : RidgeCV(alphas=(0.1, 1.0, 10.0)),
#             'en' : ElasticNetCV(l1_ratio=0.8, eps=0.00001, n_alphas=100, cv = 4),
#             'lgbmr' : LGBMRegressor(objective='regression',num_leaves=5, learning_rate=0.05, n_estimators=600, max_bin = 50,
#                                     bagging_fraction = 0.6, bagging_freq = 5, feature_fraction = 0.25,feature_fraction_seed=9,
#                                     bagging_seed=9, min_data_in_leaf = 6, min_sum_hessian_in_leaf = 11),
#             'dtr' : DecisionTreeRegressor(),
            'gbr' : GradientBoostingRegressor(loss='ls', learning_rate=0.2, n_estimators=100, random_state=103),
            'xgbr' : xgboost.XGBRegressor(max_depth=2, learning_rate=0.2, n_estimators=100, silent=True, objective='reg:linear', 
                            booster='gbtree', n_jobs=1, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, 
                            colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, 
                            random_state=0, seed=None, missing=None),
#             'rfr' : RandomForestRegressor(criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
#                                        min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, 
#                                        min_impurity_split=None, bootstrap=True, oob_score=False),             
#             'etr' : ExtraTreesRegressor(),
#             'abr' : AdaBoostRegressor()
            }

model = { 
            'lasso' : LassoCV(alphas = [1, 0.1, 0.001, 0.0005], cv = 4), 
            'kr'    : KernelRidge(alpha=0.4, kernel='linear'),
            'ridge' : RidgeCV(alphas=(0.1, 1.0, 10.0)),
            'elast' : ElasticNetCV(l1_ratio=0.8, eps=0.00001, n_alphas=100, cv = 4),
            'lgbm'  : LGBMRegressor(objective='regression',num_leaves=5, learning_rate=0.05, n_estimators=600,
                        max_bin = 50, bagging_fraction = 0.6, bagging_freq = 5, feature_fraction = 0.25,
                        feature_fraction_seed=9, bagging_seed=9, min_data_in_leaf = 6, min_sum_hessian_in_leaf = 11),
            'dtr'    : DecisionTreeRegressor(),
            'gbr'    : GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, random_state=103),
            'xgbr'   : xgboost.XGBRegressor(max_depth=2, learning_rate=0.2, n_estimators=100, silent=True, objective='reg:linear', 
                            booster='gbtree', n_jobs=1, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, 
                            colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, 
                            random_state=0, seed=None, missing=None),
            'rfr' : RandomForestRegressor(criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                                       min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, 
                                       min_impurity_split=None, bootstrap=True, oob_score=False),
            'abr' : AdaBoostRegressor(),
            'etr' : ExtraTreesRegressor(),
            'meta-learning_r'    : Dyn_reg_model(mr_models, clf_model = ExtraTreesClassifier(n_estimators = 50, max_depth = None, class_weight = 'balanced', random_state = 101))}

### Load Data

In [30]:
from sklearn.datasets import load_boston

X_train, y_train = load_boston(return_X_y = True)
print(X_train.shape, y_train.shape)

(506, 13) (506,)


### Results

In [31]:
res = {}
pred = {}
for name in model:
    start = time()
    res_test, res_train, y_pred = cv_score(model[name], X_train, y_train )
    res[name + '_train'] = res_train 
    res[name + '_test' ] = res_test
    pred[name] = y_pred
    print('%5s : rmse.train = %6.2f (%6.2f) rmse.test = %6.2f (%6.2f) cv_time = %6.2f s' 
          %(name, res_train.mean(), res_train.std(), res_test.mean(), res_test.std(), time() - start))

lasso : rmse.train =  22.99 (  5.62) rmse.test =  42.95 ( 17.41) cv_time =   0.08 s
   kr : rmse.train =  22.80 (  5.28) rmse.test =  40.45 ( 22.49) cv_time =   0.06 s
ridge : rmse.train =  20.21 (  4.09) rmse.test =  42.18 ( 19.58) cv_time =   0.03 s
elast : rmse.train =  22.78 (  5.75) rmse.test =  42.05 ( 16.98) cv_time =   0.54 s
 lgbm : rmse.train =   2.83 (  0.49) rmse.test =  24.44 ( 12.27) cv_time =   0.57 s
  dtr : rmse.train =   0.00 (  0.00) rmse.test =  33.97 ( 13.51) cv_time =   0.02 s
  gbr : rmse.train =   1.62 (  0.11) rmse.test =  19.79 ( 11.80) cv_time =   0.27 s
 xgbr : rmse.train =   2.56 (  0.26) rmse.test =  21.34 ( 11.41) cv_time =   0.23 s
  rfr : rmse.train =   1.99 (  0.41) rmse.test =  22.81 ( 10.86) cv_time =   0.14 s
  abr : rmse.train =   7.28 (  0.77) rmse.test =  24.93 ( 12.58) cv_time =   0.46 s
  etr : rmse.train =   0.00 (  0.00) rmse.test =  22.84 ( 11.55) cv_time =   0.11 s
meta-learning_r : rmse.train =   1.88 (  0.39) rmse.test =  18.69 ( 10.00) c

### Results of all models and new model (using all regression models and  GuasianNB classifier)

lasso : rmse.train =  22.99 (  5.62) rmse.test =  **42.95 ( 17.41)** cv_time =   0.09 s  
   kr : rmse.train =  22.80 (  5.28) rmse.test =  **40.45 ( 22.49)** cv_time =   0.07 s  
ridge : rmse.train =  20.21 (  4.09) rmse.test =  **42.18 ( 19.58)** cv_time =   0.02 s  
elast : rmse.train =  22.78 (  5.75) rmse.test =  **42.05 ( 16.98)** cv_time =   0.55 s  
 lgbm : rmse.train =   2.83 (  0.49) rmse.test =  **24.44 ( 12.27)** cv_time =   0.67 s  
  dtr : rmse.train =   0.00 (  0.00) rmse.test =  **39.02 ( 17.00)** cv_time =   0.02 s  
  gbr : rmse.train =   1.62 (  0.11) rmse.test =  **19.79 ( 11.80)** cv_time =   0.26 s  
 xgbr : rmse.train =   2.56 (  0.26) rmse.test =  **21.34 ( 11.41)** cv_time =   0.23 s  
  rfr : rmse.train =   2.14 (  0.35) rmse.test =  **24.06 ( 11.67)** cv_time =   0.15 s  
  abr : rmse.train =   7.41 (  0.93) rmse.test =  **26.06 ( 12.70)** cv_time =   0.54 s  
  etr : rmse.train =   0.00 (  0.00) rmse.test =  **28.83 ( 15.31)** cv_time =   0.12 s  
**meta-learning_r** : rmse.train =   8.74 (  2.28) rmse.test =  **23.96 ( 11.21)** cv_time =  14.99 s

### Results using 2 best regression models (gbr, xgbr)

lasso : rmse.train =  22.99 (  5.62) rmse.test =  **42.95 ( 17.41)** cv_time =   0.08 s  
   kr : rmse.train =  22.80 (  5.28) rmse.test =  **40.45 ( 22.49)** cv_time =   0.06 s  
ridge : rmse.train =  20.21 (  4.09) rmse.test =  **42.18 ( 19.58)** cv_time =   0.03 s  
elast : rmse.train =  22.78 (  5.75) rmse.test =  **42.05 ( 16.98)** cv_time =   0.54 s  
 lgbm : rmse.train =   2.83 (  0.49) rmse.test =  **24.44 ( 12.27)** cv_time =   0.57 s  
  dtr : rmse.train =   0.00 (  0.00) rmse.test =  **33.97 ( 13.51)** cv_time =   0.02 s  
  gbr : rmse.train =   1.62 (  0.11) rmse.test =  **19.79 ( 11.80)** cv_time =   0.27 s  
 xgbr : rmse.train =   2.56 (  0.26) rmse.test =  **21.34 ( 11.41)** cv_time =   0.23 s  
  rfr : rmse.train =   1.99 (  0.41) rmse.test =  **22.81 ( 10.86)** cv_time =   0.14 s  
  abr : rmse.train =   7.28 (  0.77) rmse.test =  **24.93 ( 12.58)** cv_time =   0.46 s  
  etr : rmse.train =   0.00 (  0.00) rmse.test =  **22.84 ( 11.55)** cv_time =   0.11 s  
meta-learning_r : rmse.train =   1.88 (  0.39) rmse.test =  **18.69 ( 10.00)** cv_time =   3.54 s

### Best classification model selection

In [32]:
mr = Dyn_reg_model(mr_models, clf_model = GaussianNB())
mr.fit(X_train, y_train)
mr.data_frame.best_model_index.value_counts()

df = mr.data_frame
le = LabelEncoder()
df['class'] = le.fit_transform(df.best_model_index)
df.head()
target_train = df['class']
clfs = {'dtc' : DecisionTreeClassifier(criterion='entropy', max_depth=None, random_state = 101),
        'gnb' : GaussianNB(),
        'knc' : KNeighborsClassifier(n_neighbors = 11),
        'logr' : LogisticRegression(C = 2.7, class_weight = 'balanced', multi_class = 'multinomial', solver = 'lbfgs', random_state = 101),
        'abc' : AdaBoostClassifier(n_estimators = 100, learning_rate = 0.1, random_state = 101),
        'bc' : BaggingClassifier(n_estimators = 20, bootstrap_features = True, random_state = 101),
        'etc' : ExtraTreesClassifier(n_estimators = 50, max_depth = None, class_weight = 'balanced', random_state = 101),
        'gbc' : GradientBoostingClassifier(learning_rate = 2.0, n_estimators = 25, max_depth = 5, random_state = 101),
        'lgbmc' : LGBMClassifier(boosting_type='goss', num_leaves = 5, max_depth = 4, 
                              learning_rate = 0.02, n_estimators = 80, class_weight = 'balanced'),
        'rfc' : RandomForestClassifier(n_estimators = 10, max_depth = 6, class_weight = 'balanced', random_state = 101),
        'vc' : VotingClassifier(estimators = [('nb',GaussianNB()), 
                                              ('ab',AdaBoostClassifier(n_estimators = 100, learning_rate = 2.1, random_state = 101)),
                                              ('gp',GaussianProcessClassifier(kernel= None, multi_class='one_vs_rest', random_state = 101))],
                                voting = 'hard'),
        'svc' : SVC(C = 0.5, kernel='sigmoid', class_weight = 'balanced', random_state = 101),
        'gpc' : GaussianProcessClassifier(kernel= None, multi_class='one_vs_rest', random_state = 101),
        }
for c in clfs:
    start = time()
    score = cross_val_score(clfs[c], X_train, target_train, cv=4, scoring='accuracy')
    print('%2s : %.3f (+/-%.3f) %6.2f s' %(c, score.mean(), score.std(), time() - start))

dtc : 0.460 (+/-0.049)   0.04 s
gnb : 0.443 (+/-0.092)   0.01 s
knc : 0.482 (+/-0.045)   0.02 s
logr : 0.395 (+/-0.063)   0.36 s
abc : 0.462 (+/-0.013)   0.86 s
bc : 0.466 (+/-0.039)   0.29 s
etc : 0.512 (+/-0.018)   0.39 s
gbc : 0.451 (+/-0.034)   0.38 s
lgbmc : 0.472 (+/-0.038)   0.13 s
rfc : 0.470 (+/-0.031)   0.10 s
vc : 0.486 (+/-0.034)   1.77 s
svc : 0.502 (+/-0.010)   0.04 s
gpc : 0.470 (+/-0.030)   0.62 s


### Classification models results

dtc : 0.460 (+/-0.049)   0.04 s  
gnb : 0.443 (+/-0.092)   0.01 s  
knc : 0.482 (+/-0.045)   0.02 s  
logr : 0.395 (+/-0.063)   0.36 s  
abc : 0.462 (+/-0.013)   0.86 s  
bc : 0.466 (+/-0.039)   0.29 s  
**etc : 0.512 (+/-0.018)   0.39 s**  
gbc : 0.451 (+/-0.034)   0.38 s  
lgbmc : 0.472 (+/-0.038)   0.13 s  
rfc : 0.470 (+/-0.031)   0.10 s  
vc : 0.486 (+/-0.034)   1.77 s  
svc : 0.502 (+/-0.010)   0.04 s  
gpc : 0.470 (+/-0.030)   0.62 s  