In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, roc_curve

In [3]:
pd.options.display.max_columns
df = pd.read_csv('df_all_rs.csv')
df = df.drop('Unnamed: 0', axis=1)
df = df.drop('index', axis=1)
df

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,TEAM_ID_away,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2004-04-14,20301188,1610612746,1610612760,2003,1610612746,87.0,0.423,0.727,0.214,17.0,37.0,1610612760,118.0,0.542,1.000,0.375,32.0,34.0,0
1,2004-04-14,20301184,1610612759,1610612743,2003,1610612759,93.0,0.424,0.679,0.100,15.0,58.0,1610612743,67.0,0.325,0.611,0.222,11.0,47.0,1
2,2004-04-14,20301181,1610612754,1610612741,2003,1610612754,101.0,0.420,0.794,0.316,24.0,58.0,1610612741,96.0,0.420,0.667,0.357,20.0,41.0,1
3,2004-04-14,20301177,1610612764,1610612740,2003,1610612764,78.0,0.375,0.714,0.211,13.0,39.0,1610612740,94.0,0.451,0.600,0.364,24.0,48.0,0
4,2004-04-14,20301179,1610612752,1610612739,2003,1610612752,90.0,0.481,0.714,0.400,13.0,42.0,1610612739,100.0,0.488,0.900,0.364,22.0,40.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2018-10-17,21800011,1610612758,1610612762,2018,1610612758,117.0,0.516,0.667,0.368,17.0,37.0,1610612762,123.0,0.519,0.737,0.481,21.0,44.0,0
19394,2018-10-17,21800012,1610612746,1610612743,2018,1610612746,98.0,0.398,0.833,0.286,21.0,47.0,1610612743,107.0,0.379,0.786,0.333,20.0,56.0,0
19395,2018-10-17,21800013,1610612756,1610612742,2018,1610612756,121.0,0.543,0.875,0.559,35.0,44.0,1610612742,100.0,0.432,0.700,0.303,28.0,38.0,1
19396,2018-10-16,21800001,1610612738,1610612755,2018,1610612738,105.0,0.433,0.714,0.297,21.0,55.0,1610612755,87.0,0.391,0.609,0.192,18.0,47.0,1


In [4]:
target = df['HOME_TEAM_WINS']
data = df[['FG_PCT_home', 'FG_PCT_away', 'FG3_PCT_home', 'FG3_PCT_away']]
data_copy = data.copy()
normalized_data = (data_copy - data_copy.mean()) / data_copy.std()

In [5]:
def evaluar_metricas(estimator, data, target, name):
    
    X_train, X_test, y_train, y_test = train_test_split(data,target)

    estimator.fit(X_train, y_train)
    y_hat = estimator.predict(X_test)

    errors = y_test - y_hat
    mae = np.mean(np.abs(errors))
    mse = np.mean(errors ** 2)
    rmse = np.sqrt(mse)
    
    return [name, mse, rmse, mae]

In [6]:
metrics_results = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])
metrics_results

Unnamed: 0,NAME,MSE,RMSE,MAE


In [None]:
#Kn
for i in range(1, 51):
    kn = KNeighborsRegressor(n_neighbors=i)
    metrics_results.loc[len(metrics_results)+1] = \
        evaluar_metricas(kn, 
                         normalized_data, 
                         target, 
                         'kn_normalized_neighbors_'+str(i))

In [7]:
#kn n times:
metrics_results_kn = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])

for n in range(1, 11):
    metrics_results_kn_2 = pd.DataFrame(columns=['NAME','MSE','RMSE', 'MAE'])
    for i in range(1, 51):
        kn = KNeighborsRegressor(n_neighbors=i)
        metrics_results_kn_2.loc[len(metrics_results_kn_2)+1] = \
            evaluar_metricas(kn, 
                             normalized_data, 
                             target, 
                             'kn_normalized_neighbors_'+str(i))
    
    
    if n==1:
        metrics_results_kn = metrics_results_kn_2
    else: 
        metrics_results_kn = pd.concat([metrics_results_kn, metrics_results_kn_2[['MSE','RMSE', 'MAE']]], axis=1)
metrics_results_kn

Unnamed: 0,NAME,MSE,RMSE,MAE,MSE.1,RMSE.1,MAE.1,MSE.2,RMSE.2,MAE.2,...,MAE.3,MSE.3,RMSE.3,MAE.4,MSE.4,RMSE.4,MAE.5,MSE.5,RMSE.5,MAE.6
1,kn_normalized_neighbors_1,0.255052,0.505026,0.255052,0.256289,0.50625,0.256289,0.26,0.509902,0.26,...,0.262887,0.271959,0.521497,0.271959,0.264124,0.51393,0.264124,0.261856,0.511718,0.261856
2,kn_normalized_neighbors_2,0.192835,0.43913,0.260928,0.197526,0.444439,0.263711,0.189742,0.435594,0.255773,...,0.263711,0.193711,0.440127,0.261237,0.199381,0.446521,0.264536,0.203711,0.451344,0.26701
3,kn_normalized_neighbors_3,0.169278,0.411435,0.258213,0.173631,0.416691,0.259725,0.179084,0.423183,0.264811,...,0.261512,0.174502,0.417734,0.264536,0.173494,0.416526,0.261512,0.173356,0.416361,0.260962
4,kn_normalized_neighbors_4,0.162719,0.403385,0.261495,0.162281,0.402841,0.26232,0.163131,0.403895,0.265928,...,0.262577,0.164858,0.406027,0.263247,0.161314,0.40164,0.260515,0.16384,0.404772,0.260825
5,kn_normalized_neighbors_5,0.155373,0.394174,0.260412,0.153221,0.391434,0.260454,0.152726,0.390801,0.259216,...,0.260454,0.157328,0.396646,0.264495,0.157847,0.3973,0.261897,0.152586,0.390622,0.259258
6,kn_normalized_neighbors_6,0.144588,0.380247,0.257904,0.153259,0.391483,0.263952,0.154422,0.392965,0.267079,...,0.261443,0.148562,0.385438,0.26299,0.147199,0.383666,0.261065,0.152194,0.39012,0.265189
7,kn_normalized_neighbors_7,0.151302,0.388976,0.265773,0.148285,0.385078,0.262916,0.149695,0.386904,0.263181,...,0.264065,0.152266,0.390213,0.268454,0.143345,0.37861,0.256377,0.139874,0.373997,0.2557
8,kn_normalized_neighbors_8,0.147477,0.384028,0.265747,0.148972,0.385969,0.266572,0.143805,0.379216,0.260284,...,0.260979,0.143367,0.378638,0.261469,0.148276,0.385067,0.265541,0.150383,0.387793,0.268943
9,kn_normalized_neighbors_9,0.144915,0.380678,0.26378,0.141525,0.376198,0.261993,0.147596,0.384182,0.266552,...,0.261558,0.146155,0.382302,0.263666,0.141767,0.376519,0.260504,0.145516,0.381466,0.266942
10,kn_normalized_neighbors_10,0.140953,0.375437,0.260907,0.143361,0.37863,0.265443,0.138584,0.372268,0.257959,...,0.258804,0.141287,0.375881,0.262515,0.142602,0.377627,0.261773,0.140569,0.374925,0.260082


In [21]:
metrics_results_kn.style.highlight_min(subset=[
    
], color = 'lightgreen', axis = 0)

KeyError: '`Styler.apply` and `.applymap` are not compatible with non-unique index or columns.'

<pandas.io.formats.style.Styler at 0x7fa59bbf9670>

In [None]:
#dt
metrics_results = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])
for i in range(1, 11):
    for j in range(1,11):
        dt = DecisionTreeRegressor(max_depth=i, min_samples_leaf=j)
        metrics_results.loc[len(metrics_results)+1] = \
            evaluar_metricas(dt, 
                             normalized_data, 
                             target, 
                             'dt_normalized__maxdepth_'+str(i)+'__minsamplesleaf_'+str(j))
        
metrics_results

In [8]:
#dt n times:
metrics_results_dt = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])

for n in range(1, 11):
    metrics_results_dt_2 = pd.DataFrame(columns=['NAME','MSE','RMSE', 'MAE'])
    for i in range(1, 11):
        for j in range(1,11):
            dt = DecisionTreeRegressor(max_depth=i, min_samples_leaf=j)
            metrics_results_dt_2.loc[len(metrics_results_dt_2)+1] = \
                evaluar_metricas(dt, 
                                 normalized_data, 
                                 target, 
                                 'dt_normalized__maxdepth_'+str(i)+'__minsamplesleaf_'+str(j))
    
    
    if n==1:
        metrics_results_dt = metrics_results_dt_2
    else: 
        metrics_results_dt = pd.concat([metrics_results_dt, metrics_results_dt_2[['MSE','RMSE', 'MAE']]], axis=1)
metrics_results_dt

Unnamed: 0,NAME,MSE,RMSE,MAE,MSE.1,RMSE.1,MAE.1,MSE.2,RMSE.2,MAE.2,...,MAE.3,MSE.3,RMSE.3,MAE.4,MSE.4,RMSE.4,MAE.5,MSE.5,RMSE.5,MAE.6
1,dt_normalized__maxdepth_1__minsamplesleaf_1,0.211160,0.459521,0.417998,0.208492,0.456609,0.416755,0.211638,0.460041,0.418784,...,0.418823,0.207076,0.455056,0.416267,0.208071,0.456148,0.416397,0.207474,0.455494,0.415277
2,dt_normalized__maxdepth_1__minsamplesleaf_2,0.209962,0.458216,0.417782,0.209585,0.457805,0.417907,0.209978,0.458234,0.418726,...,0.417058,0.208347,0.456451,0.416328,0.214001,0.462602,0.421006,0.211891,0.460316,0.418905
3,dt_normalized__maxdepth_1__minsamplesleaf_3,0.209170,0.457352,0.417641,0.211741,0.460153,0.418528,0.212406,0.460875,0.417888,...,0.414389,0.210219,0.458497,0.417688,0.210265,0.458546,0.417746,0.215985,0.464742,0.422121
4,dt_normalized__maxdepth_1__minsamplesleaf_4,0.210757,0.459083,0.419232,0.209625,0.457848,0.417232,0.210568,0.458877,0.417301,...,0.414579,0.215010,0.463692,0.421562,0.210424,0.458720,0.417774,0.208177,0.456265,0.416575
5,dt_normalized__maxdepth_1__minsamplesleaf_5,0.206884,0.454845,0.413821,0.211333,0.459710,0.419009,0.208464,0.456579,0.417352,...,0.420055,0.209037,0.457205,0.417629,0.207957,0.456023,0.415544,0.211410,0.459794,0.418477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,dt_normalized__maxdepth_10__minsamplesleaf_6,0.159407,0.399258,0.264179,0.159902,0.399877,0.265906,0.149210,0.386277,0.256034,...,0.264358,0.157098,0.396355,0.264037,0.150307,0.387695,0.256218,0.157852,0.397306,0.265517
97,dt_normalized__maxdepth_10__minsamplesleaf_7,0.163468,0.404312,0.267767,0.153573,0.391884,0.257835,0.159163,0.398952,0.265746,...,0.259068,0.155636,0.394508,0.264776,0.154035,0.392473,0.261477,0.158635,0.398291,0.265508
98,dt_normalized__maxdepth_10__minsamplesleaf_8,0.153754,0.392115,0.259577,0.153593,0.391910,0.261237,0.147512,0.384073,0.256589,...,0.271118,0.164657,0.405779,0.270258,0.152903,0.391028,0.262046,0.154409,0.392949,0.262120
99,dt_normalized__maxdepth_10__minsamplesleaf_9,0.152285,0.390238,0.258641,0.149793,0.387031,0.259376,0.145879,0.381941,0.256279,...,0.261126,0.153553,0.391858,0.261978,0.156195,0.395215,0.261432,0.153316,0.391557,0.260158


In [None]:
#rf
for k in range(1,10):
    for i in range(1, 10):
        for j in range(1,10):
            rf = RandomForestRegressor(n_estimators=k, max_depth=i, min_samples_leaf=j)
            metrics_results.loc[len(metrics_results)+1] = \
                evaluar_metricas(rf, 
                                 normalized_data,target,
                                 'rf_normalized__estimators_'+str(k)+
                                 '__maxdepth_'+str(i)+
                                 '__minsamplesleaf_'+str(j))

In [9]:
#rf n times:
metrics_results_rf = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])

for n in range(1, 11):
    metrics_results_rf_2 = pd.DataFrame(columns=['NAME','MSE','RMSE', 'MAE'])
    for k in range(1,101):
        for i in range(1, 6):
            for j in range(1,6):
                rf = RandomForestRegressor(n_estimators=k, max_depth=i, min_samples_leaf=j)
                metrics_results_rf_2.loc[len(metrics_results_rf_2)+1] = \
                    evaluar_metricas(rf, 
                                     normalized_data,target,
                                     'rf_normalized__estimators_'+str(k)+
                                     '__maxdepth_'+str(i)+
                                     '__minsamplesleaf_'+str(j))
        k = k + 25
    
    if n==1:
        metrics_results_rf = metrics_results_rf_2
    else: 
        metrics_results_rf = pd.concat([metrics_results_rf, metrics_results_rf_2[['MSE','RMSE', 'MAE']]], axis=1)
metrics_results_rf

Unnamed: 0,NAME,MSE,RMSE,MAE,MSE.1,RMSE.1,MAE.1,MSE.2,RMSE.2,MAE.2,...,MAE.3,MSE.3,RMSE.3,MAE.4,MSE.4,RMSE.4,MAE.5,MSE.5,RMSE.5,MAE.6
1,rf_normalized__estimators_1__maxdepth_1__minsa...,0.211604,0.460005,0.418317,0.206735,0.454681,0.413946,0.210909,0.459248,0.415274,...,0.415902,0.212976,0.461493,0.418568,0.208983,0.457146,0.415181,0.207634,0.455669,0.410757
2,rf_normalized__estimators_1__maxdepth_1__minsa...,0.216667,0.465475,0.421403,0.209778,0.458015,0.418814,0.203366,0.450962,0.411022,...,0.419115,0.211512,0.459905,0.419697,0.211458,0.459845,0.416796,0.209508,0.457720,0.416520
3,rf_normalized__estimators_1__maxdepth_1__minsa...,0.208046,0.456121,0.413670,0.208025,0.456097,0.417015,0.207966,0.456033,0.414622,...,0.416140,0.210710,0.459031,0.416593,0.207803,0.455854,0.415481,0.208090,0.456168,0.415195
4,rf_normalized__estimators_1__maxdepth_1__minsa...,0.211343,0.459721,0.419111,0.211682,0.460089,0.418211,0.212720,0.461216,0.419147,...,0.423477,0.213484,0.462043,0.421313,0.210108,0.458375,0.416806,0.207466,0.455484,0.415083
5,rf_normalized__estimators_1__maxdepth_1__minsa...,0.209769,0.458005,0.416857,0.209327,0.457523,0.418307,0.209835,0.458077,0.419483,...,0.420826,0.205446,0.453261,0.410810,0.210494,0.458796,0.418048,0.208130,0.456213,0.415848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2496,rf_normalized__estimators_100__maxdepth_5__min...,0.135309,0.367844,0.282237,0.134068,0.366153,0.279110,0.138744,0.372484,0.284031,...,0.284098,0.135742,0.368432,0.282475,0.134186,0.366314,0.280054,0.136570,0.369553,0.283387
2497,rf_normalized__estimators_100__maxdepth_5__min...,0.138870,0.372653,0.286156,0.135880,0.368619,0.281323,0.136349,0.369255,0.284009,...,0.283393,0.137851,0.371283,0.282183,0.135679,0.368347,0.284510,0.136861,0.369947,0.282112
2498,rf_normalized__estimators_100__maxdepth_5__min...,0.136796,0.369859,0.281557,0.138748,0.372489,0.284283,0.136392,0.369312,0.282202,...,0.284965,0.137532,0.370853,0.283066,0.133234,0.365012,0.277456,0.139042,0.372884,0.281899
2499,rf_normalized__estimators_100__maxdepth_5__min...,0.141644,0.376356,0.289088,0.139188,0.373079,0.284739,0.135528,0.368142,0.282432,...,0.281874,0.137847,0.371278,0.283733,0.136784,0.369844,0.282317,0.139083,0.372938,0.284450


In [15]:
pd.set_option('max_columns', None)
pd.set_option('max_colwidth', None)
pd.set_option('max_rows', None)
metrics_results_rf.head(100)

Unnamed: 0,NAME,MSE,RMSE,MAE,MSE.1,RMSE.1,MAE.1,MSE.2,RMSE.2,MAE.2,MSE.3,RMSE.3,MAE.3,MSE.4,RMSE.4,MAE.4,MSE.5,RMSE.5,MAE.5,MSE.6,RMSE.6,MAE.6,MSE.7,RMSE.7,MAE.7,MSE.8,RMSE.8,MAE.8,MSE.9,RMSE.9,MAE.9
1,rf_normalized__estimators_1__maxdepth_1__minsamplesleaf_1,0.211604,0.460005,0.418317,0.206735,0.454681,0.413946,0.210909,0.459248,0.415274,0.212652,0.461142,0.420209,0.204288,0.451982,0.411139,0.210629,0.458944,0.416494,0.20839,0.456498,0.415902,0.212976,0.461493,0.418568,0.208983,0.457146,0.415181,0.207634,0.455669,0.410757
2,rf_normalized__estimators_1__maxdepth_1__minsamplesleaf_2,0.216667,0.465475,0.421403,0.209778,0.458015,0.418814,0.203366,0.450962,0.411022,0.212038,0.460476,0.420155,0.21321,0.461746,0.420612,0.211094,0.459449,0.418627,0.21373,0.46231,0.419115,0.211512,0.459905,0.419697,0.211458,0.459845,0.416796,0.209508,0.45772,0.41652
3,rf_normalized__estimators_1__maxdepth_1__minsamplesleaf_3,0.208046,0.456121,0.41367,0.208025,0.456097,0.417015,0.207966,0.456033,0.414622,0.20873,0.45687,0.415889,0.210857,0.459191,0.419365,0.213406,0.461959,0.418646,0.207928,0.455991,0.41614,0.21071,0.459031,0.416593,0.207803,0.455854,0.415481,0.20809,0.456168,0.415195
4,rf_normalized__estimators_1__maxdepth_1__minsamplesleaf_4,0.211343,0.459721,0.419111,0.211682,0.460089,0.418211,0.21272,0.461216,0.419147,0.209539,0.457754,0.416904,0.208951,0.457111,0.417621,0.206208,0.454101,0.412911,0.21328,0.461823,0.423477,0.213484,0.462043,0.421313,0.210108,0.458375,0.416806,0.207466,0.455484,0.415083
5,rf_normalized__estimators_1__maxdepth_1__minsamplesleaf_5,0.209769,0.458005,0.416857,0.209327,0.457523,0.418307,0.209835,0.458077,0.419483,0.211853,0.460275,0.418341,0.215252,0.463952,0.421743,0.211276,0.459647,0.417871,0.211595,0.459994,0.420826,0.205446,0.453261,0.41081,0.210494,0.458796,0.418048,0.20813,0.456213,0.415848
6,rf_normalized__estimators_1__maxdepth_2__minsamplesleaf_1,0.171386,0.413988,0.341534,0.170208,0.412563,0.338191,0.167796,0.40963,0.338981,0.168926,0.411006,0.339224,0.175198,0.418567,0.342832,0.170961,0.413474,0.340325,0.178319,0.422278,0.34416,0.171268,0.413845,0.340289,0.171709,0.414378,0.34484,0.175367,0.418768,0.344156
7,rf_normalized__estimators_1__maxdepth_2__minsamplesleaf_2,0.172984,0.415913,0.345287,0.176583,0.420218,0.345912,0.173901,0.417015,0.343981,0.171844,0.414541,0.342202,0.169874,0.412158,0.337148,0.172801,0.415693,0.342829,0.176089,0.41963,0.348249,0.175924,0.419433,0.341533,0.168395,0.410359,0.332909,0.173949,0.417072,0.341245
8,rf_normalized__estimators_1__maxdepth_2__minsamplesleaf_3,0.172469,0.415294,0.345049,0.169021,0.411122,0.334363,0.170363,0.41275,0.33559,0.170774,0.413248,0.339641,0.172315,0.415109,0.341014,0.171069,0.413605,0.34225,0.174417,0.417633,0.341567,0.169632,0.411864,0.339194,0.172081,0.414827,0.348446,0.168765,0.410811,0.339138
9,rf_normalized__estimators_1__maxdepth_2__minsamplesleaf_4,0.177926,0.421813,0.345564,0.176617,0.420259,0.343081,0.171993,0.414721,0.341428,0.173433,0.416453,0.340107,0.180175,0.42447,0.351189,0.176755,0.420422,0.346524,0.175213,0.418585,0.346263,0.171188,0.413749,0.337504,0.17303,0.415969,0.340805,0.168129,0.410036,0.335598
10,rf_normalized__estimators_1__maxdepth_2__minsamplesleaf_5,0.174522,0.417758,0.342723,0.175404,0.418812,0.345735,0.172416,0.41523,0.342462,0.175753,0.419229,0.341392,0.167003,0.40866,0.336969,0.169941,0.41224,0.338285,0.172756,0.415639,0.343053,0.174893,0.418202,0.341464,0.175449,0.418867,0.346628,0.166778,0.408385,0.33847


In [None]:
#mlpr
metrics_results_mlpr = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])
activations = ['relu', 'logistic', 'tanh']
solvers = ['adam', 'sgd']
learning_rates = ['constant', 'invscaling', 'adaptive']

for act in activations:
    for sol in solvers:
        for rate in learning_rates:
            mlpr = MLPRegressor(activation=act, solver=sol, learning_rate=rate)
            metrics_results_mlpr.loc[len(metrics_results_mlpr)+1] = \
                evaluar_metricas(mlpr, 
                                 normalized_data,
                                 target, 
                                 'mlpr_normalized_activation_'+act+
                                 '__solver_'+sol+
                                 '__batchsize_100'+
                                 '__learningrate_'+rate)
    
metrics_results_mlpr

In [17]:
#mlpr n times:
metrics_results_mlpr = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])
activations = ['relu', 'logistic', 'tanh']
solvers = ['adam', 'sgd']
learning_rates = ['constant', 'invscaling', 'adaptive']
batch_sizes = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]

for n in range(1, 11):
    metrics_results_mlpr_2 = pd.DataFrame(columns=['NAME','MSE','RMSE', 'MAE'])
    for act in activations:
        for sol in solvers:
            for rate in learning_rates:
                for size in batch_sizes:
                    mlpr = MLPRegressor(activation=act, solver=sol, batch_size=size, learning_rate=rate)
                    metrics_results_mlpr_2.loc[len(metrics_results_mlpr_2)+1] = \
                        evaluar_metricas(mlpr, 
                                         normalized_data,
                                         target, 
                                         'mlpr_normalized__activation_'+act+
                                         '__solver_'+sol+
                                         '__batchsize_'+str(size)+
                                         '__learningrate_'+rate)
    
    if n==1:
        metrics_results_mlpr = metrics_results_mlpr_2
    else: 
        metrics_results_mlpr = pd.concat([metrics_results_mlpr, metrics_results_mlpr_2[['MSE','RMSE', 'MAE']]], axis=1)
metrics_results_mlpr

Unnamed: 0,NAME,MSE,RMSE,MAE,MSE.1,RMSE.1,MAE.1,MSE.2,RMSE.2,MAE.2,MSE.3,RMSE.3,MAE.3,MSE.4,RMSE.4,MAE.4,MSE.5,RMSE.5,MAE.5,MSE.6,RMSE.6,MAE.6,MSE.7,RMSE.7,MAE.7,MSE.8,RMSE.8,MAE.8,MSE.9,RMSE.9,MAE.9
1,mlpr_normalized__activation_relu__solver_adam__batchsize_50__learningrate_constant,0.135195,0.367689,0.268829,0.131076,0.362044,0.259552,0.1316,0.362767,0.267392,0.133688,0.365634,0.276199,0.133647,0.365577,0.26611,0.134388,0.366589,0.272078,0.129149,0.359374,0.269664,0.131647,0.362832,0.270302,0.131932,0.363225,0.276516,0.134338,0.366521,0.269202
2,mlpr_normalized__activation_relu__solver_adam__batchsize_100__learningrate_constant,0.130805,0.36167,0.259172,0.133269,0.36506,0.269453,0.131346,0.362417,0.267264,0.131716,0.362928,0.269625,0.132582,0.364118,0.264877,0.124942,0.353472,0.260566,0.135087,0.367542,0.265374,0.132124,0.363489,0.267077,0.132477,0.363973,0.269854,0.130268,0.360927,0.264919
3,mlpr_normalized__activation_relu__solver_adam__batchsize_150__learningrate_constant,0.135441,0.368023,0.273508,0.12981,0.360291,0.260486,0.132045,0.36338,0.262902,0.126395,0.355521,0.257364,0.130096,0.360688,0.265635,0.131546,0.362693,0.262709,0.13454,0.366797,0.26858,0.129864,0.360366,0.258616,0.126247,0.355313,0.259177,0.133603,0.365517,0.264
4,mlpr_normalized__activation_relu__solver_adam__batchsize_200__learningrate_constant,0.129643,0.36006,0.258005,0.132231,0.363636,0.26592,0.134568,0.366835,0.267729,0.129731,0.360182,0.261025,0.131136,0.362127,0.263391,0.132586,0.364124,0.263375,0.130257,0.360912,0.259291,0.134962,0.367372,0.267708,0.13472,0.367042,0.266584,0.137526,0.370845,0.269589
5,mlpr_normalized__activation_relu__solver_adam__batchsize_250__learningrate_constant,0.133882,0.365899,0.267931,0.129039,0.35922,0.261691,0.132108,0.363466,0.269861,0.128979,0.359136,0.262864,0.131845,0.363105,0.266822,0.131655,0.362843,0.263307,0.134537,0.366792,0.263353,0.134797,0.367147,0.26714,0.133148,0.364894,0.265943,0.132942,0.364613,0.263851
6,mlpr_normalized__activation_relu__solver_adam__batchsize_300__learningrate_constant,0.132618,0.364167,0.269463,0.133922,0.365954,0.267572,0.127641,0.357269,0.26484,0.131899,0.363179,0.261204,0.131152,0.36215,0.26423,0.131639,0.362821,0.267261,0.131429,0.362531,0.265834,0.130223,0.360864,0.262974,0.131107,0.362087,0.262614,0.136531,0.369501,0.267833
7,mlpr_normalized__activation_relu__solver_adam__batchsize_350__learningrate_constant,0.131532,0.362674,0.267119,0.132712,0.364297,0.266449,0.137213,0.370422,0.266331,0.129867,0.360371,0.265793,0.128162,0.357998,0.262124,0.130624,0.361419,0.262448,0.130242,0.360891,0.265349,0.129694,0.360131,0.26265,0.129255,0.359521,0.261397,0.130719,0.361551,0.262269
8,mlpr_normalized__activation_relu__solver_adam__batchsize_400__learningrate_constant,0.129428,0.359761,0.266567,0.129431,0.359765,0.263445,0.125605,0.354407,0.25778,0.133314,0.365121,0.268409,0.133316,0.365125,0.269494,0.132825,0.364451,0.268898,0.132734,0.364326,0.270301,0.133515,0.365397,0.263205,0.133976,0.366027,0.269083,0.12871,0.358761,0.264667
9,mlpr_normalized__activation_relu__solver_adam__batchsize_450__learningrate_constant,0.131976,0.363284,0.263612,0.128927,0.359063,0.263666,0.133461,0.365323,0.268599,0.131134,0.362124,0.268493,0.132767,0.364372,0.265729,0.132983,0.364669,0.27125,0.132038,0.36337,0.266887,0.131543,0.362688,0.265919,0.133658,0.365593,0.268504,0.130036,0.360605,0.26645
10,mlpr_normalized__activation_relu__solver_adam__batchsize_500__learningrate_constant,0.133308,0.365114,0.269268,0.130118,0.360718,0.267311,0.133231,0.365009,0.272982,0.128703,0.358752,0.263933,0.133273,0.365066,0.26699,0.129851,0.360349,0.263804,0.129481,0.359835,0.265924,0.132476,0.363972,0.265172,0.127451,0.357003,0.259775,0.130949,0.361869,0.264527


In [None]:
df_metrics_list = [metrics_results_kn, metrics_results_dt, metrics_results_rf, metrics_results_mlpr]

df_all_metrics = pd.concat(df_metrics_list)

df_all_metrics.to_csv('df_all_metrics.csv')

In [None]:
#logR
metrics_results_logR = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])
logR = LogisticRegression()

for i in range(1,51):
    metrics_results_logR.loc[len(metrics_results_logR)+1] = \
        evaluar_metricas(logR, 
                         normalized_data, 
                         target, 
                         'logR_norm_'+str(i))
metrics_results_logR

In [None]:
#logR n times:
metrics_results_logR = pd.DataFrame(columns=['NAME', 'MSE','RMSE', 'MAE'])

for n in range(1, 11):
    metrics_results_logR_2 = pd.DataFrame(columns=['NAME','MSE','RMSE', 'MAE'])
    for i in range(1,51):
        logR = LogisticRegression()
        metrics_results_logR.loc[len(metrics_results_logR)+1] = \
            evaluar_metricas(logR, 
                             normalized_data, 
                             target, 
                             'logR_norm_'+str(i))
    
    
    if n==1:
        metrics_results_logR = metrics_results_logR_2
    else: 
        metrics_results_logR = pd.concat([metrics_results_logR, metrics_results_logR_2[['MSE','RMSE', 'MAE']]], axis=1)
metrics_results_logR