### Tuning model

A partir de los experimentos y basado en las observaciones realizadas trabajaremos de ahora en adelante solo con los modelos: SVM y LGB.

Para el entrenamiento cada modelo utilizaremos las variantes:

- Train: 2 seasons ; Test: 1 season ; Ventana deslizante de: 1 season
- Train: 3 seasons ; Test: 1 season ; Ventana deslizante de: 1 season

#### Setup

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
results_total = []
utils_exp.exp_results = []
experiment_name = f"{exp_prefix}2_season_tunning"
best_models = [
    ("RF", RandomForestClassifier(n_estimators=300,
                                                max_depth=11,
                                                n_jobs=-1,
                                                random_state=0,
                                                criterion='entropy',
                                                max_features=19,
                                                min_samples_leaf=9,
                                 )),
    ('SVM', SVC(kernel='linear', random_state=0,
                              C=63.513891775842986,
                              gamma=76.1465194934807,
                              degree= 0.4300244876201068))
]
folds, train_seasons, test_seasons = sscv.split(train_size=2, test_size=1)
X, y = train.X_y_values(df, exp_X_columns, exp_y_columns)
#params = (experiment_name, best_models, folds, train_seasons, test_seasons, X, y)
#names, results = utils_exp.run_experiment(*params)
#results_total.append((experiment_name, results))

In [None]:
from yellowbrick.style import set_palette
import warnings
import io
from sklearn import base, metrics, model_selection, preprocessing, tree
from yellowbrick.classifier import ROCAUC
import yellowbrick.classifier

set_palette('flatui')
fold_last_season = folds[len(folds)-1:]
name, model = best_models[0]

#### Explore models perfomance

In [None]:


#warnings.filterwarnings('ignore')
fold_last_season = folds[len(folds)-1:]
for name, model in best_models:
    for i, idx_data in enumerate(fold_last_season):
        print(f"Test season: {test_seasons[i]}")
        train_idx, test_idx = idx_data
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx].ravel(), y[test_idx].ravel()
        y_true = y_test
        fit_info = model.fit(X_train, y_train)

        #with sns.plotting_context('paper'):
        #    fig, ax = plt.subplots(figsize=(2, 2), dpi=150)
        cm_viz = classifier.ConfusionMatrix(model, percent=True)
        cm_viz.fit(X_train, y_train)
        cm_viz.score(X_test, y_test)
        cm_viz.show()
        #cm_viz.poof()
        #with sns.plotting_context('talk'):
            #fig, ax = plt.subplots(figsize=(20, 20), dpi=300)
        plt.figure(figsize=(20, 20), dpi=300)
        fi_viz = features.FeatureImportances(model, labels=exp_X_columns, relative=False)
        fi_viz.fit(X_train, y_train)
        fi_viz.score(X_test, y_test)
        #fi_viz.poof()
        fi_viz.show()

#### ROC AUC

In [None]:
from sklearn.metrics import roc_auc_score

for i, idx_data in enumerate(fold_last_season):
    print(f"Test season: {test_seasons[i]}")
    train_idx, test_idx = idx_data
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx].ravel(), y[test_idx].ravel()
    y_true = y_test
    fit_info = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    roc_auc_score(y_true, y_pred, average='weighted')
    print(f'roc_auc: {roc_auc_score}')
    roc_viz = ROCAUC(model, classes=['LOSS', 'WIN'])
    roc_viz.score(X_test, y_test)
    roc_viz.show()

    roc_viz = classifier.ClassPredictionError(model, classes=['LOSS', 'WIN'])
    roc_viz.score(X_test, y_test)
    roc_viz.show()

#### Hyperopt

In [None]:
v_folds = folds[:-1]
test_fold = folds[-1]
#name, model = best_models[0]

In [None]:
from sklearn.metrics import roc_auc_score
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.svm import SVC
import lightgbm as lgb
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

##### SVM

In [None]:
def hyperopt_cv(params):
    cv_results = {
            "roc_auc": []
        }

    #del params['normalize']
    #del params['scale']

    for train_idx, test_idx in v_folds:
        X[train_idx], X[test_idx] = utils.feature_scaling(X[train_idx], X[test_idx], 5)
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx].ravel(), y[test_idx].ravel()
        y_true = y_test
        model = SVC(**params)
        fit_info = model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        roc_auc = roc_auc_score(y_true, y_pred, average='weighted')
        cv_results["roc_auc"].append(roc_auc)

    return np.mean(cv_results["roc_auc"])

space4svm = {
    'C': hp.uniform('C', 0, 100),
    'kernel': hp.choice('kernel', ['linear']),
    'gamma': hp.uniform('gamma', 0, 100),
    'degree': hp.uniform('degree', 0, 6)
    #'scale': hp.choice('scale', [0, 1]),
    #'normalize': hp.choice('normalize', [0, 1])
}

# best: {'C': 63.513891775842986, 'degree': 0.4300244876201068, 'gamma': 76.1465194934807, 'kernel': 0}
def f(params):
    acc = hyperopt_cv(params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space4svm, algo=tpe.suggest, max_evals=100, trials=trials)
print("best:", best)

##### RandomForest

In [None]:
def hyperopt_train_test(params):
    cv_results = {
            "roc_auc": []
        }

    #del params['normalize']
    #del params['scale']

    for train_idx, test_idx in v_folds:
        X[train_idx], X[test_idx] = utils.feature_scaling(X[train_idx], X[test_idx], 5)
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx].ravel(), y[test_idx].ravel()
        y_true = y_test
        model = RandomForestClassifier(**params)
        fit_info = model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        roc_auc = roc_auc_score(y_true, y_pred, average='weighted')
        cv_results["roc_auc"].append(roc_auc)

    return np.mean(cv_results["roc_auc"])

space4rf = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', range(1,20)),
    'n_estimators': hp.choice('n_estimators', range(100, 500, 50)),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
    'min_samples_leaf':  hp.choice('min_samples_leaf',  np.arange(1, 20, step=1, dtype=int)),
    #'min_samples_split': None,
    #'max_leaf_nodes': None
    #'scale': hp.choice('scale', [0, 1]),
    #'normalize': hp.choice('normalize', [0, 1])
}
best = 0
def f(params):
    global best
    acc = hyperopt_train_test(params)
    if acc > best:
        best = acc
    print('new best:', best, params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space4rf, algo=tpe.suggest, max_evals=300, trials=trials)
print("best:", best)

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import lightgbm as lgb
import xgboost as xgboost

def hyperopt_train_test(params):
    t = params['type']
    del params['type']
    if t == 'RF':
        clf = RandomForestClassifier(**params)
    elif t == 'SVM':
        clf = SVC(**params)
    elif t == 'XGB':
        clf = xgb.XGBClassifier(**params)
    elif t == 'LGB':
        clf = lgb.LGBMClassifier(**params)
    else:
        return 0

    cv_results = {
            "roc_auc": []
        }

    #del params['normalize']
    #del params['scale']

    for train_idx, test_idx in v_folds:
        X[train_idx], X[test_idx] = utils.feature_scaling(X[train_idx], X[test_idx], 5)
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx].ravel(), y[test_idx].ravel()
        y_true = y_test
        fit_info = clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        roc_auc = roc_auc_score(y_true, y_pred, average='weighted')
        cv_results["roc_auc"].append(roc_auc)

    return np.mean(cv_results["roc_auc"])

space = hp.choice('classifier_type', [
    {
        'type': 'LGB',
        'n_estimators': hp.choice('n_estimators2', range(100, 500, 50)),
        'max_depth': hp.choice('max_depth2', range(1,20)),
        'num_leaves': hp.choice('num_leaves2', np.arange( 30, 150, 1, dtype=int)),
        'reg_alpha': hp.quniform('reg_alpha2', 0.0, 1.0, 0.1),
        'reg_lambda': hp.quniform('reg_lambda2', 0.0, 1.0, 0.1),
        'learning_rate': hp.loguniform('learning_rate2', np.log(0.01), np.log(0.2)),
        'min_child_weight': hp.choice('min_child_weight2', [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4]),
        'min_child_samples': hp.choice('min_child_samples2', np.arange( 20, 500, 5, dtype=int))
    },
    {
        'type': 'SVM',
        'C': hp.uniform('C', 0, 100),
        'kernel': hp.choice('kernel', ['linear']),
        'gamma': hp.uniform('gamma', 0, 100),
        'degree': hp.uniform('degree', 0, 6)
    },{
        'type': 'XGB',
        'n_estimators': hp.choice('n_estimators1', range(100, 500, 50)),
        'max_depth': hp.choice('max_depth1', range(1,20)),
        #'num_leaves': hp.choice('num_leaves1', np.arange( 30, 150, 1, dtype=int)),
        'reg_alpha': hp.quniform('reg_alpha1', 0.0, 1.0, 0.1),
        'reg_lambda': hp.quniform('reg_lambda1', 0.0, 1.0, 0.1),
        'learning_rate': hp.loguniform('learning_rate1', np.log(0.01), np.log(0.2)),
        'min_child_weight': hp.choice('min_child_weight1', [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4]),
        #'min_child_samples': hp.choice('min_child_samples1', np.arange( 20, 500, 5, dtype=int)),
    },
#     {
#         'type': 'RF',
#         'max_depth': hp.choice('max_depth', range(1,20)),
#         'max_features': hp.choice('max_features', range(1,20)),
#         'n_estimators': hp.choice('n_estimators', range(100, 500, 50)),
#         'criterion': hp.choice('criterion', ["gini", "entropy"]),
#         'min_samples_leaf':  hp.choice('min_samples_leaf',  np.arange(1, 20, step=1, dtype=int))
#         #'scale': hp.choice('scale', [0, 1]),
#         #'normalize': hp.choice('normalize', [0, 1])
#     }
])
count = 0
best = 0
def f(params):
    global best, count
    count += 1
    acc = hyperopt_train_test(params.copy())
    if acc > best:
        print('new best:', acc, 'using', params['type'])
        best = acc
    if count % 50 == 0:
        print('iters:', count, ', acc:', acc, 'using', params)
    return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space, algo=tpe.suggest, max_evals=1000, trials=trials)
print('best:', best)