## Imports and Setup

In [1]:
import tensorflow as tf
import pandas as pd
from tensorflow.keras import layers
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
pd.set_option('display.max_columns', 500)

## Data Import 

In [10]:
from json import dump

In [11]:
data = pd.read_csv('PredictionData.csv')
data_invert = pd.read_csv('PredictionDataInvert.csv')
data['Conference_team_one'] = data['Conference_team_one'].astype('category')
data['Conference_team_two'] = data['Conference_team_two'].astype('category')

data_invert['Conference_team_one'] = data_invert['Conference_team_one'].astype('category')
data_invert['Conference_team_two'] = data_invert['Conference_team_two'].astype('category')

cat_columns = data.select_dtypes(['category']).columns

mapping = dict(enumerate(data['Conference_team_one'].cat.categories))
# print (mapping)

with open('conferences.json', 'wt') as fp:
    dump(mapping, fp)


data[cat_columns] = data[cat_columns].apply(lambda x: x.cat.codes)

cat_columns = data_invert.select_dtypes(['category']).columns
data_invert[cat_columns] = data_invert[cat_columns].apply(lambda x: x.cat.codes)

{0: 'A10', 1: 'ACC', 2: 'AE', 3: 'ASun', 4: 'Amer', 5: 'B10', 6: 'B12', 7: 'BE', 8: 'BSth', 9: 'BW', 10: 'CAA', 11: 'CUSA', 12: 'Horz', 13: 'Ivy', 14: 'MAAC', 15: 'MAC', 16: 'MCon', 17: 'MEAC', 18: 'MVC', 19: 'MWC', 20: 'NEC', 21: 'OVC', 22: 'P10', 23: 'P12', 24: 'Pat', 25: 'SB', 26: 'SC', 27: 'SEC', 28: 'SWAC', 29: 'Slnd', 30: 'WAC', 31: 'WCC'}


In [4]:
data.head()

Unnamed: 0,Seed_team_one,Team_team_one,Conference_team_one,AdjEM_team_one,AdjO_team_one,AdjD_team_one,AdjT_team_one,Luck_team_one,Opp AdjEM_team_one,OppO_team_one,OppD_team_one,NCSOS AdjEM_team_one,W_team_one,L_team_one,ast_pct_team_one,blk_pct_team_one,efg_pct_team_one,fg3a_per_fga_pct_team_one,ft_rate_team_one,fta_per_fga_pct_team_one,losses_conf_team_one,losses_home_team_one,losses_visitor_team_one,opp_pts_team_one,pts_team_one,sos_team_one,srs_team_one,tov_pct_team_one,trb_pct_team_one,ts_pct_team_one,wins_conf_team_one,wins_home_team_one,wins_visitor_team_one,def_rtg_team_one,opp_fta_per_fga_pct_team_one,opp_fg3a_per_fga_pct_team_one,opp_ts_pct_team_one,opp_trb_pct_team_one,opp_ast_pct_team_one,opp_stl_pct_team_one,opp_blk_pct_team_one,opp_efg_pct_team_one,opp_tov_pct_team_one,opp_orb_pct_team_one,opp_ft_rate_team_one,num_FR_team_one,num_SO_team_one,num_JR_team_one,num_F_team_one,num_G_team_one,num_C_team_one,ast_per_g_1_team_one,ast_per_g_2_team_one,ast_per_g_3_team_one,ast_per_g_4_team_one,ast_per_g_5_team_one,ast_per_g_6_team_one,ast_per_g_7_team_one,ast_per_g_8_team_one,ast_per_g_9_team_one,ast_per_g_10_team_one,ast_per_g_11_team_one,ast_per_g_12_team_one,ast_per_g_13_team_one,ast_per_g_14_team_one,blk_per_g_1_team_one,blk_per_g_2_team_one,blk_per_g_3_team_one,blk_per_g_4_team_one,blk_per_g_5_team_one,blk_per_g_6_team_one,blk_per_g_7_team_one,blk_per_g_8_team_one,blk_per_g_9_team_one,blk_per_g_10_team_one,blk_per_g_11_team_one,blk_per_g_12_team_one,blk_per_g_13_team_one,blk_per_g_14_team_one,drb_per_g_1_team_one,drb_per_g_2_team_one,drb_per_g_3_team_one,drb_per_g_4_team_one,drb_per_g_5_team_one,drb_per_g_6_team_one,drb_per_g_7_team_one,drb_per_g_8_team_one,drb_per_g_9_team_one,drb_per_g_10_team_one,drb_per_g_11_team_one,drb_per_g_12_team_one,drb_per_g_13_team_one,drb_per_g_14_team_one,fg2_pct_1_team_one,fg2_pct_2_team_one,fg2_pct_3_team_one,fg2_pct_4_team_one,fg2_pct_5_team_one,fg2_pct_6_team_one,fg2_pct_7_team_one,fg2_pct_8_team_one,fg2_pct_9_team_one,fg2_pct_10_team_one,fg2_pct_11_team_one,fg2_pct_12_team_one,fg2_pct_13_team_one,fg2_pct_14_team_one,fg2_per_g_1_team_one,fg2_per_g_2_team_one,fg2_per_g_3_team_one,fg2_per_g_4_team_one,fg2_per_g_5_team_one,fg2_per_g_6_team_one,fg2_per_g_7_team_one,fg2_per_g_8_team_one,fg2_per_g_9_team_one,fg2_per_g_10_team_one,fg2_per_g_11_team_one,fg2_per_g_12_team_one,fg2_per_g_13_team_one,fg2_per_g_14_team_one,fg2a_per_g_1_team_one,fg2a_per_g_2_team_one,fg2a_per_g_3_team_one,fg2a_per_g_4_team_one,fg2a_per_g_5_team_one,fg2a_per_g_6_team_one,fg2a_per_g_7_team_one,fg2a_per_g_8_team_one,fg2a_per_g_9_team_one,fg2a_per_g_10_team_one,fg2a_per_g_11_team_one,fg2a_per_g_12_team_one,fg2a_per_g_13_team_one,fg2a_per_g_14_team_one,fg3_pct_1_team_one,fg3_pct_2_team_one,fg3_pct_3_team_one,fg3_pct_4_team_one,fg3_pct_5_team_one,fg3_pct_6_team_one,fg3_pct_7_team_one,fg3_pct_8_team_one,fg3_pct_9_team_one,fg3_pct_10_team_one,fg3_pct_11_team_one,fg3_pct_12_team_one,fg3_pct_13_team_one,fg3_pct_14_team_one,fg3_per_g_1_team_one,fg3_per_g_2_team_one,fg3_per_g_3_team_one,fg3_per_g_4_team_one,fg3_per_g_5_team_one,fg3_per_g_6_team_one,fg3_per_g_7_team_one,fg3_per_g_8_team_one,fg3_per_g_9_team_one,fg3_per_g_10_team_one,fg3_per_g_11_team_one,fg3_per_g_12_team_one,fg3_per_g_13_team_one,fg3_per_g_14_team_one,fg3a_per_g_1_team_one,fg3a_per_g_2_team_one,fg3a_per_g_3_team_one,fg3a_per_g_4_team_one,fg3a_per_g_5_team_one,fg3a_per_g_6_team_one,fg3a_per_g_7_team_one,fg3a_per_g_8_team_one,fg3a_per_g_9_team_one,fg3a_per_g_10_team_one,fg3a_per_g_11_team_one,fg3a_per_g_12_team_one,fg3a_per_g_13_team_one,fg3a_per_g_14_team_one,fg_pct_1_team_one,fg_pct_2_team_one,fg_pct_3_team_one,fg_pct_4_team_one,fg_pct_5_team_one,fg_pct_6_team_one,fg_pct_7_team_one,fg_pct_8_team_one,fg_pct_9_team_one,fg_pct_10_team_one,fg_pct_11_team_one,fg_pct_12_team_one,fg_pct_13_team_one,fg_pct_14_team_one,fg_per_g_1_team_one,fg_per_g_2_team_one,fg_per_g_3_team_one,fg_per_g_4_team_one,fg_per_g_5_team_one,fg_per_g_6_team_one,fg_per_g_7_team_one,fg_per_g_8_team_one,fg_per_g_9_team_one,fg_per_g_10_team_one,fg_per_g_11_team_one,fg_per_g_12_team_one,fg_per_g_13_team_one,fg_per_g_14_team_one,fga_per_g_1_team_one,fga_per_g_2_team_one,fga_per_g_3_team_one,fga_per_g_4_team_one,fga_per_g_5_team_one,fga_per_g_6_team_one,fga_per_g_7_team_one,fga_per_g_8_team_one,fga_per_g_9_team_one,fga_per_g_10_team_one,fga_per_g_11_team_one,fga_per_g_12_team_one,fga_per_g_13_team_one,fga_per_g_14_team_one,ft_pct_1_team_one,ft_pct_2_team_one,ft_pct_3_team_one,ft_pct_4_team_one,ft_pct_5_team_one,ft_pct_6_team_one,ft_pct_7_team_one,ft_pct_8_team_one,ft_pct_9_team_one,ft_pct_10_team_one,ft_pct_11_team_one,ft_pct_12_team_one,ft_pct_13_team_one,ft_pct_14_team_one,ft_per_g_1_team_one,ft_per_g_2_team_one,ft_per_g_3_team_one,ft_per_g_4_team_one,ft_per_g_5_team_one,ft_per_g_6_team_one,ft_per_g_7_team_one,ft_per_g_8_team_one,ft_per_g_9_team_one,ft_per_g_10_team_one,ft_per_g_11_team_one,ft_per_g_12_team_one,ft_per_g_13_team_one,ft_per_g_14_team_one,fta_per_g_1_team_one,fta_per_g_2_team_one,fta_per_g_3_team_one,...,fga_per_g_6_team_two,fga_per_g_7_team_two,fga_per_g_8_team_two,fga_per_g_9_team_two,fga_per_g_10_team_two,fga_per_g_11_team_two,fga_per_g_12_team_two,fga_per_g_13_team_two,fga_per_g_14_team_two,ft_pct_1_team_two,ft_pct_2_team_two,ft_pct_3_team_two,ft_pct_4_team_two,ft_pct_5_team_two,ft_pct_6_team_two,ft_pct_7_team_two,ft_pct_8_team_two,ft_pct_9_team_two,ft_pct_10_team_two,ft_pct_11_team_two,ft_pct_12_team_two,ft_pct_13_team_two,ft_pct_14_team_two,ft_per_g_1_team_two,ft_per_g_2_team_two,ft_per_g_3_team_two,ft_per_g_4_team_two,ft_per_g_5_team_two,ft_per_g_6_team_two,ft_per_g_7_team_two,ft_per_g_8_team_two,ft_per_g_9_team_two,ft_per_g_10_team_two,ft_per_g_11_team_two,ft_per_g_12_team_two,ft_per_g_13_team_two,ft_per_g_14_team_two,fta_per_g_1_team_two,fta_per_g_2_team_two,fta_per_g_3_team_two,fta_per_g_4_team_two,fta_per_g_5_team_two,fta_per_g_6_team_two,fta_per_g_7_team_two,fta_per_g_8_team_two,fta_per_g_9_team_two,fta_per_g_10_team_two,fta_per_g_11_team_two,fta_per_g_12_team_two,fta_per_g_13_team_two,fta_per_g_14_team_two,height_1_team_two,height_2_team_two,height_3_team_two,height_4_team_two,height_5_team_two,height_6_team_two,height_7_team_two,height_8_team_two,height_9_team_two,height_10_team_two,height_11_team_two,height_12_team_two,height_13_team_two,height_14_team_two,mp_per_g_1_team_two,mp_per_g_2_team_two,mp_per_g_3_team_two,mp_per_g_4_team_two,mp_per_g_5_team_two,mp_per_g_6_team_two,mp_per_g_7_team_two,mp_per_g_8_team_two,mp_per_g_9_team_two,mp_per_g_10_team_two,mp_per_g_11_team_two,mp_per_g_12_team_two,mp_per_g_13_team_two,mp_per_g_14_team_two,orb_per_g_1_team_two,orb_per_g_2_team_two,orb_per_g_3_team_two,orb_per_g_4_team_two,orb_per_g_5_team_two,orb_per_g_6_team_two,orb_per_g_7_team_two,orb_per_g_8_team_two,orb_per_g_9_team_two,orb_per_g_10_team_two,orb_per_g_11_team_two,orb_per_g_12_team_two,orb_per_g_13_team_two,orb_per_g_14_team_two,pf_per_g_1_team_two,pf_per_g_2_team_two,pf_per_g_3_team_two,pf_per_g_4_team_two,pf_per_g_5_team_two,pf_per_g_6_team_two,pf_per_g_7_team_two,pf_per_g_8_team_two,pf_per_g_9_team_two,pf_per_g_10_team_two,pf_per_g_11_team_two,pf_per_g_12_team_two,pf_per_g_13_team_two,pf_per_g_14_team_two,pts_per_g_1_team_two,pts_per_g_2_team_two,pts_per_g_3_team_two,pts_per_g_4_team_two,pts_per_g_5_team_two,pts_per_g_6_team_two,pts_per_g_7_team_two,pts_per_g_8_team_two,pts_per_g_9_team_two,pts_per_g_10_team_two,pts_per_g_11_team_two,pts_per_g_12_team_two,pts_per_g_13_team_two,pts_per_g_14_team_two,stl_per_g_1_team_two,stl_per_g_2_team_two,stl_per_g_3_team_two,stl_per_g_4_team_two,stl_per_g_5_team_two,stl_per_g_6_team_two,stl_per_g_7_team_two,stl_per_g_8_team_two,stl_per_g_9_team_two,stl_per_g_10_team_two,stl_per_g_11_team_two,stl_per_g_12_team_two,stl_per_g_13_team_two,stl_per_g_14_team_two,tov_per_g_1_team_two,tov_per_g_2_team_two,tov_per_g_3_team_two,tov_per_g_4_team_two,tov_per_g_5_team_two,tov_per_g_6_team_two,tov_per_g_7_team_two,tov_per_g_8_team_two,tov_per_g_9_team_two,tov_per_g_10_team_two,tov_per_g_11_team_two,tov_per_g_12_team_two,tov_per_g_13_team_two,tov_per_g_14_team_two,trb_per_g_1_team_two,trb_per_g_2_team_two,trb_per_g_3_team_two,trb_per_g_4_team_two,trb_per_g_5_team_two,trb_per_g_6_team_two,trb_per_g_7_team_two,trb_per_g_8_team_two,trb_per_g_9_team_two,trb_per_g_10_team_two,trb_per_g_11_team_two,trb_per_g_12_team_two,trb_per_g_13_team_two,trb_per_g_14_team_two,weight_1_team_two,weight_2_team_two,weight_3_team_two,weight_4_team_two,weight_5_team_two,weight_6_team_two,weight_7_team_two,weight_8_team_two,weight_9_team_two,weight_10_team_two,weight_11_team_two,weight_12_team_two,weight_13_team_two,weight_14_team_two,num_SR_team_two,ast_per_g_15_team_two,ast_per_g_16_team_two,blk_per_g_15_team_two,blk_per_g_16_team_two,drb_per_g_15_team_two,drb_per_g_16_team_two,fg2_pct_15_team_two,fg2_pct_16_team_two,fg2_per_g_15_team_two,fg2_per_g_16_team_two,fg2a_per_g_15_team_two,fg2a_per_g_16_team_two,fg3_pct_15_team_two,fg3_pct_16_team_two,fg3_per_g_15_team_two,fg3_per_g_16_team_two,fg3a_per_g_15_team_two,fg3a_per_g_16_team_two,fg_pct_15_team_two,fg_pct_16_team_two,fg_per_g_15_team_two,fg_per_g_16_team_two,fga_per_g_15_team_two,fga_per_g_16_team_two,ft_pct_15_team_two,ft_pct_16_team_two,ft_per_g_15_team_two,ft_per_g_16_team_two,fta_per_g_15_team_two,fta_per_g_16_team_two,height_15_team_two,height_16_team_two,mp_per_g_15_team_two,mp_per_g_16_team_two,orb_per_g_15_team_two,orb_per_g_16_team_two,pf_per_g_15_team_two,pf_per_g_16_team_two,pts_per_g_15_team_two,pts_per_g_16_team_two,stl_per_g_15_team_two,stl_per_g_16_team_two,tov_per_g_15_team_two,tov_per_g_16_team_two,trb_per_g_15_team_two,trb_per_g_16_team_two,weight_15_team_two,weight_16_team_two,year_team_two,week1_team_two,week2_team_two,week3_team_two,week4_team_two,week5_team_two,week6_team_two,week7_team_two,week8_team_two,week9_team_two,week10_team_two,week11_team_two,week12_team_two,week13_team_two,week14_team_two,week15_team_two,week16_team_two,week17_team_two,week18_team_two,max_team_two,final_team_two,result,score_one,score_two
0,1,Maryland,1,29.25,119.2,89.9,73.7,0.025,9.88,109.1,99.3,1.62,32,4,65.9,9.4,0.53,0.258,0.301,0.415,1,0,2,2552,3060,9.38,23.5,15.5,52.4,0.569,15,15,7,0.0,0.0,0.0,0.0,47.6,0.0,0.0,0.0,0.399,0.0,0.0,0.0,2.0,1.0,5.0,2,7,3.0,7.9,2.9,2.4,2.1,1.5,1.2,0.9,0.8,0.8,0.3,0.1,0.1,0.0,0.0,2.0,1.5,0.9,0.5,0.4,0.3,0.2,0.2,0.1,0.0,0.0,0.0,0.0,0.0,5.9,4.4,3.3,3.3,2.5,2.1,2.0,1.8,0.7,0.5,0.3,0.1,0.0,0.0,0.765,0.667,0.579,0.556,0.547,0.525,0.519,0.516,0.507,0.464,0.427,0.333,0.0,0.0,5.5,4.8,4.4,3.6,1.6,1.3,1.3,1.2,0.6,0.3,0.2,0.1,0.0,0.0,10.1,9.5,8.4,7.0,3.1,3.1,2.7,2.1,0.8,0.5,0.3,0.2,0.0,0.0,1.0,0.5,0.5,0.425,0.397,0.396,0.344,0.255,0.25,0.0,0.0,0.0,0.0,0.0,2.6,1.2,1.1,0.5,0.4,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,6.4,3.6,2.7,1.5,1.1,0.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,0.0,0.667,0.545,0.526,0.524,0.504,0.5,0.477,0.469,0.469,0.453,0.382,0.286,0.0,0.0,7.0,5.5,4.8,4.0,2.5,2.3,1.7,1.6,0.8,0.5,0.2,0.1,0.0,0.0,14.9,10.1,9.5,8.5,6.6,4.8,3.8,3.1,1.2,1.0,0.4,0.3,0.0,0.0,1.0,0.898,0.836,0.824,0.803,0.778,0.767,0.623,0.585,0.563,0.5,0.0,0.0,0.0,4.2,3.9,2.8,2.4,1.7,1.7,1.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,6.7,4.4,4.1,...,4.8,4.7,4.2,2.0,1.8,1.5,0.6,0.4,0.3,0.833,0.824,0.814,0.786,0.78,0.754,0.746,0.629,0.605,0.565,0.525,0.0,0.0,0.0,4.3,2.7,2.5,1.7,1.7,1.3,0.7,0.6,0.5,0.5,0.4,0.0,0.0,0.0,5.2,3.3,3.2,2.7,2.3,1.7,1.2,1.2,0.9,0.6,0.5,0.0,0.0,0.0,82.0,82.0,81.0,81.0,81.0,81.0,80.0,79.0,78.0,78.0,76.0,75.0,75.0,73.0,35.1,27.1,26.9,23.7,21.1,19.9,19.4,19.0,11.4,11.2,9.7,1.9,1.3,0.6,1.8,1.6,1.4,1.0,1.0,0.9,0.7,0.6,0.6,0.5,0.5,0.2,0.1,0.1,3.1,3.0,2.6,2.5,2.5,1.9,1.8,1.6,1.6,1.5,1.0,0.5,0.4,0.2,19.9,11.1,8.8,6.9,6.4,6.3,5.3,4.6,2.2,2.0,1.6,0.6,0.3,0.3,1.2,0.9,0.6,0.6,0.5,0.5,0.5,0.5,0.4,0.2,0.1,0.0,0.0,0.0,3.1,2.7,2.5,1.5,1.4,1.3,1.1,0.8,0.7,0.7,0.7,0.2,0.2,0.0,7.2,5.0,5.0,3.7,3.2,2.7,2.3,2.2,2.2,1.9,1.5,0.4,0.3,0.1,275.0,235.0,230.0,215.0,210.0,205.0,205.0,200.0,200.0,190.0,190.0,185.0,185.0,180.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,85,70
1,1,Maryland,1,29.25,119.2,89.9,73.7,0.025,9.88,109.1,99.3,1.62,32,4,65.9,9.4,0.53,0.258,0.301,0.415,1,0,2,2552,3060,9.38,23.5,15.5,52.4,0.569,15,15,7,0.0,0.0,0.0,0.0,47.6,0.0,0.0,0.0,0.399,0.0,0.0,0.0,2.0,1.0,5.0,2,7,3.0,7.9,2.9,2.4,2.1,1.5,1.2,0.9,0.8,0.8,0.3,0.1,0.1,0.0,0.0,2.0,1.5,0.9,0.5,0.4,0.3,0.2,0.2,0.1,0.0,0.0,0.0,0.0,0.0,5.9,4.4,3.3,3.3,2.5,2.1,2.0,1.8,0.7,0.5,0.3,0.1,0.0,0.0,0.765,0.667,0.579,0.556,0.547,0.525,0.519,0.516,0.507,0.464,0.427,0.333,0.0,0.0,5.5,4.8,4.4,3.6,1.6,1.3,1.3,1.2,0.6,0.3,0.2,0.1,0.0,0.0,10.1,9.5,8.4,7.0,3.1,3.1,2.7,2.1,0.8,0.5,0.3,0.2,0.0,0.0,1.0,0.5,0.5,0.425,0.397,0.396,0.344,0.255,0.25,0.0,0.0,0.0,0.0,0.0,2.6,1.2,1.1,0.5,0.4,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,6.4,3.6,2.7,1.5,1.1,0.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,0.0,0.667,0.545,0.526,0.524,0.504,0.5,0.477,0.469,0.469,0.453,0.382,0.286,0.0,0.0,7.0,5.5,4.8,4.0,2.5,2.3,1.7,1.6,0.8,0.5,0.2,0.1,0.0,0.0,14.9,10.1,9.5,8.5,6.6,4.8,3.8,3.1,1.2,1.0,0.4,0.3,0.0,0.0,1.0,0.898,0.836,0.824,0.803,0.778,0.767,0.623,0.585,0.563,0.5,0.0,0.0,0.0,4.2,3.9,2.8,2.4,1.7,1.7,1.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,6.7,4.4,4.1,...,6.0,3.1,1.3,0.9,0.5,0.0,0.0,0.0,0.0,1.0,0.777,0.768,0.756,0.714,0.67,0.667,0.66,0.6,0.5,0.0,0.0,0.0,0.0,3.2,2.9,2.7,2.3,2.2,0.8,0.5,0.4,0.2,0.1,0.0,0.0,0.0,0.0,4.1,3.8,3.5,3.5,3.3,1.3,1.0,0.7,0.3,0.1,0.0,0.0,0.0,0.0,83.0,80.0,80.0,78.0,77.0,77.0,77.0,77.0,77.0,76.0,75.0,75.0,74.0,70.0,34.2,32.5,32.3,30.1,28.2,23.8,14.6,6.4,2.6,0.8,0.5,0.5,0.5,0.0,2.3,1.4,1.3,1.0,0.8,0.6,0.6,0.4,0.1,0.0,0.0,0.0,0.0,0.0,2.8,2.4,2.4,2.3,2.1,2.1,2.1,0.5,0.5,0.2,0.0,0.0,0.0,0.0,15.1,12.3,10.2,9.3,8.1,7.8,3.0,1.0,0.9,0.9,0.5,0.0,0.0,0.0,1.3,1.3,0.9,0.7,0.6,0.6,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,2.5,2.1,1.9,1.7,1.6,1.4,1.2,0.4,0.3,0.2,0.0,0.0,0.0,0.0,5.3,5.2,4.9,4.3,3.3,2.7,2.6,0.9,0.3,0.0,0.0,0.0,0.0,0.0,265.0,240.0,230.0,220.0,208.0,205.0,200.0,195.0,195.0,190.0,190.0,190.0,185.0,180.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,87,57
2,1,Maryland,1,29.25,119.2,89.9,73.7,0.025,9.88,109.1,99.3,1.62,32,4,65.9,9.4,0.53,0.258,0.301,0.415,1,0,2,2552,3060,9.38,23.5,15.5,52.4,0.569,15,15,7,0.0,0.0,0.0,0.0,47.6,0.0,0.0,0.0,0.399,0.0,0.0,0.0,2.0,1.0,5.0,2,7,3.0,7.9,2.9,2.4,2.1,1.5,1.2,0.9,0.8,0.8,0.3,0.1,0.1,0.0,0.0,2.0,1.5,0.9,0.5,0.4,0.3,0.2,0.2,0.1,0.0,0.0,0.0,0.0,0.0,5.9,4.4,3.3,3.3,2.5,2.1,2.0,1.8,0.7,0.5,0.3,0.1,0.0,0.0,0.765,0.667,0.579,0.556,0.547,0.525,0.519,0.516,0.507,0.464,0.427,0.333,0.0,0.0,5.5,4.8,4.4,3.6,1.6,1.3,1.3,1.2,0.6,0.3,0.2,0.1,0.0,0.0,10.1,9.5,8.4,7.0,3.1,3.1,2.7,2.1,0.8,0.5,0.3,0.2,0.0,0.0,1.0,0.5,0.5,0.425,0.397,0.396,0.344,0.255,0.25,0.0,0.0,0.0,0.0,0.0,2.6,1.2,1.1,0.5,0.4,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,6.4,3.6,2.7,1.5,1.1,0.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,0.0,0.667,0.545,0.526,0.524,0.504,0.5,0.477,0.469,0.469,0.453,0.382,0.286,0.0,0.0,7.0,5.5,4.8,4.0,2.5,2.3,1.7,1.6,0.8,0.5,0.2,0.1,0.0,0.0,14.9,10.1,9.5,8.5,6.6,4.8,3.8,3.1,1.2,1.0,0.4,0.3,0.0,0.0,1.0,0.898,0.836,0.824,0.803,0.778,0.767,0.623,0.585,0.563,0.5,0.0,0.0,0.0,4.2,3.9,2.8,2.4,1.7,1.7,1.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,6.7,4.4,4.1,...,5.4,4.8,4.7,4.5,3.3,2.1,1.3,1.2,0.8,1.0,1.0,0.75,0.738,0.706,0.703,0.698,0.696,0.67,0.667,0.605,0.556,0.548,0.5,2.6,1.9,1.9,1.7,1.5,1.5,1.1,0.8,0.6,0.5,0.4,0.3,0.3,0.3,3.7,2.8,2.7,2.5,2.5,2.0,2.0,1.1,1.0,0.7,0.5,0.4,0.4,0.3,83.0,82.0,81.0,81.0,80.0,78.0,78.0,77.0,77.0,76.0,75.0,75.0,74.0,73.0,33.3,27.4,27.1,25.9,17.2,16.8,16.7,14.1,13.9,13.1,12.7,5.3,4.3,2.3,2.1,2.1,2.0,1.9,1.7,1.4,1.3,1.1,0.9,0.3,0.2,0.2,0.1,0.1,2.8,2.3,2.2,2.1,1.9,1.9,1.8,1.6,1.5,0.9,0.9,0.5,0.4,0.1,17.5,11.6,8.9,8.9,7.1,6.1,5.9,5.5,5.3,3.7,2.1,1.1,0.8,0.8,1.6,1.5,1.1,1.0,0.8,0.6,0.6,0.5,0.4,0.3,0.3,0.2,0.1,0.0,3.0,2.0,1.8,1.6,1.5,1.3,1.1,0.9,0.8,0.7,0.6,0.6,0.3,0.3,6.3,5.4,4.5,4.3,4.3,4.0,4.0,2.9,2.6,1.1,1.0,0.7,0.6,0.5,240.0,240.0,236.0,215.0,215.0,214.0,211.0,205.0,195.0,194.0,193.0,188.0,187.0,185.0,2.0,0.1,0.0,0.0,0.0,0.1,0.0,0.167,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.211,0.0,0.2,0.0,0.8,0.0,0.4,0.0,0.1,0.0,0.3,0.0,72.0,0.0,2.1,0.0,0.1,0.0,0.1,0.0,0.7,0.0,0.0,0.0,0.3,0.0,0.3,0.0,180.0,0.0,2002,4.0,10.0,13.0,11.0,9.0,7.0,6.0,6.0,8.0,12.0,8.0,10.0,7.0,10.0,12.0,11.0,12.0,16.0,4.0,16.0,1,78,68
3,1,Maryland,1,29.25,119.2,89.9,73.7,0.025,9.88,109.1,99.3,1.62,32,4,65.9,9.4,0.53,0.258,0.301,0.415,1,0,2,2552,3060,9.38,23.5,15.5,52.4,0.569,15,15,7,0.0,0.0,0.0,0.0,47.6,0.0,0.0,0.0,0.399,0.0,0.0,0.0,2.0,1.0,5.0,2,7,3.0,7.9,2.9,2.4,2.1,1.5,1.2,0.9,0.8,0.8,0.3,0.1,0.1,0.0,0.0,2.0,1.5,0.9,0.5,0.4,0.3,0.2,0.2,0.1,0.0,0.0,0.0,0.0,0.0,5.9,4.4,3.3,3.3,2.5,2.1,2.0,1.8,0.7,0.5,0.3,0.1,0.0,0.0,0.765,0.667,0.579,0.556,0.547,0.525,0.519,0.516,0.507,0.464,0.427,0.333,0.0,0.0,5.5,4.8,4.4,3.6,1.6,1.3,1.3,1.2,0.6,0.3,0.2,0.1,0.0,0.0,10.1,9.5,8.4,7.0,3.1,3.1,2.7,2.1,0.8,0.5,0.3,0.2,0.0,0.0,1.0,0.5,0.5,0.425,0.397,0.396,0.344,0.255,0.25,0.0,0.0,0.0,0.0,0.0,2.6,1.2,1.1,0.5,0.4,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,6.4,3.6,2.7,1.5,1.1,0.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,0.0,0.667,0.545,0.526,0.524,0.504,0.5,0.477,0.469,0.469,0.453,0.382,0.286,0.0,0.0,7.0,5.5,4.8,4.0,2.5,2.3,1.7,1.6,0.8,0.5,0.2,0.1,0.0,0.0,14.9,10.1,9.5,8.5,6.6,4.8,3.8,3.1,1.2,1.0,0.4,0.3,0.0,0.0,1.0,0.898,0.836,0.824,0.803,0.778,0.767,0.623,0.585,0.563,0.5,0.0,0.0,0.0,4.2,3.9,2.8,2.4,1.7,1.7,1.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,6.7,4.4,4.1,...,5.2,2.3,1.2,1.1,1.1,0.8,0.5,0.5,0.4,0.9,0.789,0.779,0.779,0.75,0.726,0.667,0.65,0.62,0.579,0.533,0.0,0.0,0.0,4.6,2.5,2.2,1.9,1.8,1.7,0.5,0.5,0.5,0.3,0.3,0.0,0.0,0.0,5.9,3.8,3.4,2.9,2.7,2.3,0.8,0.6,0.6,0.5,0.4,0.2,0.0,0.0,84.0,82.0,80.0,80.0,79.0,79.0,77.0,77.0,77.0,76.0,75.0,74.0,73.0,72.0,36.0,31.4,30.0,28.7,27.7,24.8,8.5,8.0,7.2,4.4,2.8,2.0,1.5,1.4,3.2,2.6,2.3,0.7,0.7,0.6,0.6,0.5,0.4,0.3,0.2,0.2,0.1,0.0,2.8,2.6,2.6,2.4,2.3,2.2,1.0,1.0,1.0,0.8,0.6,0.3,0.3,0.0,20.3,12.6,11.4,11.3,9.2,7.9,2.4,1.6,1.2,1.2,1.0,0.9,0.3,0.0,2.1,1.3,1.2,1.0,0.8,0.6,0.2,0.2,0.2,0.1,0.1,0.1,0.0,0.0,3.3,2.9,2.4,2.4,1.7,1.4,0.6,0.6,0.4,0.3,0.2,0.2,0.0,0.0,9.0,7.5,6.0,3.3,2.7,2.3,1.9,1.8,1.5,0.8,0.5,0.4,0.4,0.3,252.0,245.0,235.0,230.0,225.0,220.0,220.0,217.0,215.0,215.0,200.0,200.0,195.0,190.0,2.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3,1.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,175.0,150.0,2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,17.0,0.0,0.0,0.0,23.0,19.0,10.0,10.0,10.0,1,90,82
4,1,Maryland,1,29.25,119.2,89.9,73.7,0.025,9.88,109.1,99.3,1.62,32,4,65.9,9.4,0.53,0.258,0.301,0.415,1,0,2,2552,3060,9.38,23.5,15.5,52.4,0.569,15,15,7,0.0,0.0,0.0,0.0,47.6,0.0,0.0,0.0,0.399,0.0,0.0,0.0,2.0,1.0,5.0,2,7,3.0,7.9,2.9,2.4,2.1,1.5,1.2,0.9,0.8,0.8,0.3,0.1,0.1,0.0,0.0,2.0,1.5,0.9,0.5,0.4,0.3,0.2,0.2,0.1,0.0,0.0,0.0,0.0,0.0,5.9,4.4,3.3,3.3,2.5,2.1,2.0,1.8,0.7,0.5,0.3,0.1,0.0,0.0,0.765,0.667,0.579,0.556,0.547,0.525,0.519,0.516,0.507,0.464,0.427,0.333,0.0,0.0,5.5,4.8,4.4,3.6,1.6,1.3,1.3,1.2,0.6,0.3,0.2,0.1,0.0,0.0,10.1,9.5,8.4,7.0,3.1,3.1,2.7,2.1,0.8,0.5,0.3,0.2,0.0,0.0,1.0,0.5,0.5,0.425,0.397,0.396,0.344,0.255,0.25,0.0,0.0,0.0,0.0,0.0,2.6,1.2,1.1,0.5,0.4,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,6.4,3.6,2.7,1.5,1.1,0.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,0.0,0.667,0.545,0.526,0.524,0.504,0.5,0.477,0.469,0.469,0.453,0.382,0.286,0.0,0.0,7.0,5.5,4.8,4.0,2.5,2.3,1.7,1.6,0.8,0.5,0.2,0.1,0.0,0.0,14.9,10.1,9.5,8.5,6.6,4.8,3.8,3.1,1.2,1.0,0.4,0.3,0.0,0.0,1.0,0.898,0.836,0.824,0.803,0.778,0.767,0.623,0.585,0.563,0.5,0.0,0.0,0.0,4.2,3.9,2.8,2.4,1.7,1.7,1.5,0.5,0.3,0.1,0.1,0.0,0.0,0.0,6.7,4.4,4.1,...,5.6,5.6,1.3,1.2,1.0,0.8,0.8,0.7,0.5,1.0,0.808,0.793,0.787,0.755,0.738,0.699,0.68,0.667,0.667,0.579,0.575,0.5,0.0,4.2,2.3,2.3,2.1,1.9,1.9,1.3,0.5,0.4,0.4,0.1,0.1,0.1,0.0,5.5,3.9,3.1,2.8,2.6,2.4,1.6,0.7,0.6,0.6,0.2,0.1,0.1,0.0,83.0,82.0,81.0,81.0,81.0,78.0,77.0,76.0,75.0,75.0,75.0,73.0,73.0,72.0,31.4,30.9,30.2,27.5,26.8,20.9,15.3,7.0,4.2,3.9,3.3,2.3,2.2,2.2,3.4,3.4,2.0,1.4,1.2,0.7,0.7,0.6,0.5,0.3,0.3,0.1,0.0,0.0,2.9,2.6,2.5,2.1,2.0,1.8,1.8,1.0,0.6,0.6,0.5,0.3,0.2,0.1,19.8,15.6,14.8,13.4,8.1,7.9,7.1,1.9,1.2,1.0,1.0,0.8,0.7,0.2,1.8,1.6,1.6,1.3,1.1,0.9,0.6,0.3,0.3,0.2,0.1,0.1,0.1,0.0,3.0,2.9,2.7,2.5,1.6,1.1,1.0,0.4,0.4,0.4,0.3,0.2,0.2,0.1,11.4,8.3,5.3,4.8,3.3,2.7,2.6,1.6,0.9,0.7,0.6,0.5,0.4,0.2,255.0,255.0,250.0,235.0,230.0,230.0,215.0,215.0,205.0,190.0,185.0,175.0,175.0,165.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2002,7.0,4.0,8.0,4.0,4.0,3.0,2.0,2.0,1.0,4.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,1,97,88


In [5]:
labels = np.array(data['result'])
labels = np.reshape(labels, newshape=(labels.shape[0], 1))
scores = data[['score_one', 'score_two']].to_numpy()
labels_invert = np.array(data_invert['result'])
labels_invert = np.reshape(labels_invert, newshape=(labels_invert.shape[0], 1))
scores_invert = data_invert[['score_one', 'score_two']].to_numpy()

data = data.drop(columns=['result', 'score_one', 'score_two'])
data_invert = data_invert.drop(columns=['result', 'score_one', 'score_two'])

In [6]:
cols_to_drop = ['Team_team_one', 'Team_team_two', 'year_team_one', 'year_team_two']

cols_not = [col for col in data.columns if col not in cols_to_drop]

data = data[cols_to_drop + cols_not]
data_invert = data_invert[cols_to_drop + cols_not]

data_np = data.drop(columns=cols_to_drop).to_numpy()

data_invert_np = data_invert.drop(columns=cols_to_drop).to_numpy()

## Tensorflow Experiment

In [7]:
num_features = data_np.shape[1]
num_neurons = num_features

In [8]:
x = tf.placeholder(dtype=tf.float32, shape=(None, num_features))
y = tf.placeholder(dtype=tf.float32, shape=(None, 1))

b_hidden = tf.Variable(tf.zeros([num_neurons]))
W_hidden = tf.Variable(tf.random_normal([num_features, num_neurons]))

z_hidden = tf.add(tf.matmul(x, W_hidden), b_hidden)
a_hidden = tf.nn.relu(z_hidden) # shape is (910)

b_out = tf.Variable(tf.zeros([1]))
W_out = tf.Variable(tf.random_normal([num_features, 1]))

z_out = tf.add(tf.matmul(a_hidden, W_out), b_out)
# a_out = tf.sigmoid(z_out)

In [9]:
training_epochs = 3000
learning_rate = 0.01

cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=z_out)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

predicted = tf.nn.sigmoid(z_out)
correct_pred = tf.equal(tf.round(predicted), y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(training_epochs + 1):
        sess.run(optimizer, feed_dict={x: data_np, y: labels})
        loss, _, acc = sess.run([cost, optimizer, accuracy], feed_dict={x: data_np, y: labels})
        if step % 500 == 0:
            print("Step: {:5}\tLoss: {:.3f}\tAcc: {:.2%}".format(step, loss, acc))

Step:     0	Loss: 8863.375	Acc: 70.41%
Step:   500	Loss: 85.288	Acc: 89.30%
Step:  1000	Loss: 14.753	Acc: 97.57%
Step:  1500	Loss: 29.572	Acc: 93.53%
Step:  2000	Loss: 0.000	Acc: 100.00%
Step:  2500	Loss: 0.000	Acc: 100.00%
Step:  3000	Loss: 0.000	Acc: 100.00%


## Define Tensorflow Models and Data

In [11]:
def build_model(num_features, num_hidden_layers=3, learning_rate=0.001):
    model = tf.keras.Sequential()
    for _ in range(num_hidden_layers):
        model.add(layers.Dense(units=num_features, activation='relu'))
    
    model.add(layers.Dense(units=1, activation='sigmoid'))
    
    model.compile(optimizer=tf.train.AdamOptimizer(learning_rate),
                  loss='binary_crossentropy', 
                  metrics=['binary_crossentropy', 'accuracy', tf.keras.metrics.Recall(), tf.keras.metrics.Precision()])
    
    return model

def get_callbacks(scaler=True, split=True):
    return [
        tf.keras.callbacks.EarlyStopping(monitor='val_loss' if split else 'loss', patience=20 if scaler else 100)
    ]

In [12]:
class Data:
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def split(self, split=False):
        if split:
            X_train, X_test, y_train, y_test = train_test_split(self.data, self.labels)
        else:
            X_train, X_test, y_train, y_test = self.data, None, self.labels, None
        
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        
        return self
        
    def clone(self):
        data_new = Data(self.data, self.labels)
        if hasattr(self, 'X_train'):
            data_new.X_train = self.X_train
            data_new.X_test = self.X_test
            data_new.y_train = self.y_train
            data_new.y_test = self.y_test
        if hasattr(self, 'scaler'):
            data_new.scaler = self.scaler
        if hasattr(self, 'model'):
            data_new.model = self.model
        return data_new
    
    def add_scaler(self, scaler):
        self.scaler = scaler
    
    def fit_scaler(self):
        if not self.scaler:
            return
        self.scaler = self.scaler.fit(self.X_train)
        self.X_train = self.scaler.transform(self.X_train)
        if self.X_test is not None:
            self.X_test = self.scaler.transform(self.X_test)
        self.data = self.scaler.transform(self.data)
    
    def add_model(self, model):
        self.model = model
        
    def fit_model(self, verbose=3):
        if verbose == 0:
            print (f'Fitting model {self.label}')
        if self.scaler:
            if self.X_test is None and self.y_test is None:
                self.history = self.model.fit(x=self.X_train, 
                                              y=self.y_train, 
                                              batch_size=100, 
                                              callbacks=get_callbacks(True, False), 
                                              epochs=100, 
                                             verbose=verbose)
            else:
                self.history = self.model.fit(x=self.X_train, 
                                              y=self.y_train, 
                                              batch_size=100,
                                              validation_data=(self.X_test, self.y_test), 
                                              callbacks=get_callbacks(True, True), 
                                              epochs=2000, 
                                             verbose=verbose)
        else:
            if self.X_test is None and self.y_test is None:
                self.history = self.model.fit(x=self.X_train,
                                             y=self.y_train, 
                                             batch_size=100,
                                             callbacks=get_callbacks(False, False),
                                             epochs=100, 
                                             verbose=verbose)
            else:
                self.history = self.model.fit(x=self.X_train, 
                                             y=self.y_train,
                                             batch_size=100,
                                             validation_data=(self.X_test, self.y_test),
                                             callbacks=get_callbacks(False, True),
                                             epochs=2000, 
                                             verbose=verbose)
        if verbose == 0:
            print ('Done!')
        return self.history
    
    def predict_classes(self):
        self.predictions = self.model.predict_classes(x=self.data)
    
    def __str__(self):
        return f'{self.X_train.shape}, {self.X_test}, {self.y_train.shape}, {self.y_test}'

## Define Tensorflow Possibilities

In [13]:
data_obj = Data(data_np, labels)
data_invert_obj = Data(data_invert_np, labels_invert)

datas = [data_obj, data_invert_obj]

In [14]:
possibilities = []
for poss in datas:
    for val in [True, False]:
        possibilities.append(poss.clone().split(val))


label_models = ['Normal Split', 'Normal Non-Split', 'Invert Split', 'Invert Non-Split']
for poss, label in zip(possibilities, label_models):
    poss.label = label

In [59]:
possibilities_min_max = []
possibilities_standard = []

for poss in possibilities:
    poss.add_scaler(None)

for poss in possibilities:
    min_max_poss = poss.clone()
    min_max_poss.add_scaler(MinMaxScaler())
    min_max_poss.fit_scaler()
    min_max_poss.label = poss.label + ' Min Max'
    possibilities_min_max.append(min_max_poss)
    
    standard_poss = poss.clone()
    standard_poss.add_scaler(StandardScaler())
    standard_poss.fit_scaler()
    standard_poss.label = poss.label + ' Standard'
    possibilities_standard.append(standard_poss)


possibilities = possibilities + possibilities_min_max + possibilities_standard

## Add Models and Fit

In [60]:
for poss in possibilities:
    poss.add_model(build_model(poss.X_train.shape[1]))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [61]:
# for poss in possibilities:
#     poss.fit_model(0)    

## Plot Training of Models

In [62]:
num_plots = len(possibilities)
fig = plt.figure(figsize=(10, 20))
num_y = num_plots // 2

for idx, poss in enumerate(possibilities):
    history = poss.history
    plt.subplot(num_y, 2, idx+1)
    plt.plot(history.history['loss'], label='Loss')
    plt.plot(history.history['acc'], label='Accuracy')
    
    if history.history.get('val_loss') is not None:
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.plot(history.history['val_acc'], label='Validation Accuracy')
    
    plt.title(f'Training History ({poss.label})')
    plt.legend()
    plt.ylim((0, 1))

plt.tight_layout()
plt.show()

NameError: name 'plt' is not defined

## Evaluate TF Models by Observing Prediction for UVA-UMBC game and viewing Classification Reports and Confusion Matrices

In [56]:
row = data[(data['Team_team_one'] == 'Virginia') & (data['year_team_one'] == 2018)]
row_unc = data[(data['Team_team_two'] == 'North Carolina') & (data['year_team_one'] == 2017) & (data['Team_team_one'] == 'Gonzaga')]

# print (row.head())
# print (row_unc.head())

row = row.drop(columns=cols_to_drop).to_numpy()
row_unc = row_unc.drop(columns=cols_to_drop).to_numpy()

In [31]:
print (row.shape)
print (row_unc.shape)
print (data_np.shape)

(1, 910)
(1, 910)
(1112, 910)


In [83]:
for poss in possibilities:
    prediction = poss.model.predict_proba(x=row if not poss.scaler else poss.scaler.transform(row))
    print (poss.label)
    print (prediction)

NameError: name 'possibilities' is not defined

In [21]:
for poss in possibilities:
    poss.predict_classes()

In [23]:
for poss in possibilities:
    print (poss.label)
    print (classification_report(poss.labels, poss.predictions))

Normal Split
              precision    recall  f1-score   support

           0       0.88      0.74      0.80       329
           1       0.90      0.96      0.93       783

    accuracy                           0.89      1112
   macro avg       0.89      0.85      0.86      1112
weighted avg       0.89      0.89      0.89      1112

Normal Non-Split
              precision    recall  f1-score   support

           0       0.75      0.67      0.71       329
           1       0.87      0.91      0.89       783

    accuracy                           0.84      1112
   macro avg       0.81      0.79      0.80      1112
weighted avg       0.83      0.84      0.83      1112

Invert Split
              precision    recall  f1-score   support

           0       0.96      0.74      0.83      1112
           1       0.79      0.97      0.87      1112

    accuracy                           0.85      2224
   macro avg       0.87      0.85      0.85      2224
weighted avg       0.87      0.

In [24]:
for poss in possibilities:
    print (poss.label)
    print (confusion_matrix(poss.labels, poss.predictions))

Normal Split
[[242  87]
 [ 34 749]]
Normal Non-Split
[[220 109]
 [ 73 710]]
Invert Split
[[ 820  292]
 [  36 1076]]
Invert Non-Split
[[ 762  350]
 [  68 1044]]
Normal Split Min Max
[[286  43]
 [ 64 719]]
Normal Non-Split Min Max
[[329   0]
 [  0 783]]
Invert Split Min Max
[[ 873  239]
 [  27 1085]]
Invert Non-Split Min Max
[[1101   11]
 [  55 1057]]
Normal Split Standard
[[287  42]
 [ 39 744]]
Normal Non-Split Standard
[[329   0]
 [  0 783]]
Invert Split Standard
[[1023   89]
 [  48 1064]]
Invert Non-Split Standard
[[1112    0]
 [   0 1112]]


## Now, let's examine some SKLearn Algorithms

In [45]:
# SKLearn Algorithms
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso, ElasticNet, Ridge
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neural_network import MLPClassifier
import joblib

In [16]:
labels_sklearn = labels.flatten()
labels_invert_sklearn = labels_invert.flatten()

print (labels_sklearn.shape)

sklearn_space = [(data_np, labels_sklearn, 'Data'), (data_invert_np, labels_invert_sklearn, 'Data Invert')]

sklearn_score_space = [(data_np, scores, 'Data'), (data_invert_np, scores_invert, 'Data Invert')]

(1112,)


### Random Forest

In [17]:
%%time

## Random Forest
param_space_random_forest = {
    'rf__bootstrap' : [True, False],
    'rf__n_estimators' : [100, 1000]
}

random_forests = []
for poss in sklearn_space:
    random_forest_min_max = RandomForestClassifier(n_jobs=-1)
    pipe = Pipeline(steps=[('scale', MinMaxScaler()), ('rf', random_forest_min_max)])
    
    grid_search_forest = GridSearchCV(estimator=pipe, param_grid=param_space_random_forest, cv=StratifiedKFold(n_splits=10))
    
    random_forest_standard = RandomForestClassifier(n_jobs=-1)
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()), ('rf', random_forest_standard)])
    
    grid_search_forest_standard = GridSearchCV(estimator=pipe_standard, param_grid=param_space_random_forest, cv=StratifiedKFold(n_splits=10))
    
    random_forests.append((poss[0], poss[1], grid_search_forest, poss[2] + ' Min Max'))
    random_forests.append((poss[0], poss[1], grid_search_forest_standard, poss[2] + ' Standard'))

for X, y, forest, _ in random_forests:
    forest.fit(X, y)
    
for X, y, forest, label in random_forests:
    print (label)
    print ('{:.4f}'.format(forest.score(X, y)))

Data Min Max
1.0000
Data Standard
1.0000
Data Invert Min Max
1.0000
Data Invert Standard
1.0000
CPU times: user 3min 33s, sys: 21.4 s, total: 3min 54s
Wall time: 11min 6s


In [18]:
for _, _, forest, label in random_forests:
    print (label)
    print (forest.best_params_)

Data Min Max
{'rf__bootstrap': False, 'rf__n_estimators': 100}
Data Standard
{'rf__bootstrap': False, 'rf__n_estimators': 1000}
Data Invert Min Max
{'rf__bootstrap': False, 'rf__n_estimators': 1000}
Data Invert Standard
{'rf__bootstrap': False, 'rf__n_estimators': 1000}


In [19]:
for _, _, forest, label in random_forests:
    feature_importances = forest.best_estimator_.named_steps['rf'].feature_importances_
    feature_importances = [(idx, val) for idx, val in enumerate(feature_importances)]
    feature_importances.sort(key=lambda k: k[1], reverse=True)
    feature_importances = [idx+len(cols_to_drop) for idx, _ in feature_importances[:10]]
    print (label)
    for idx, col in enumerate(data.columns[feature_importances]):
        print (f'{idx+1}. {col}')

Data Min Max
1. AdjEM_team_two
2. W_team_two
3. AdjEM_team_one
4. srs_team_two
5. pts_team_one
6. pts_team_two
7. opp_pts_team_one
8. AdjD_team_two
9. srs_team_one
10. opp_pts_team_two
Data Standard
1. AdjEM_team_two
2. pts_team_two
3. W_team_two
4. AdjEM_team_one
5. pts_team_one
6. srs_team_two
7. W_team_one
8. AdjD_team_two
9. opp_pts_team_one
10. Opp AdjEM_team_two
Data Invert Min Max
1. AdjEM_team_two
2. AdjEM_team_one
3. W_team_two
4. W_team_one
5. srs_team_one
6. pts_team_two
7. pts_team_one
8. srs_team_two
9. Opp AdjEM_team_one
10. Opp AdjEM_team_two
Data Invert Standard
1. AdjEM_team_two
2. AdjEM_team_one
3. W_team_one
4. W_team_two
5. srs_team_one
6. pts_team_one
7. pts_team_two
8. srs_team_two
9. Opp AdjEM_team_one
10. opp_pts_team_one


In [20]:
for X, y, forest, label in random_forests:
    print (label)
    y_pred = forest.predict(X)
    print (confusion_matrix(y, y_pred))

Data Min Max
[[329   0]
 [  0 783]]
Data Standard
[[329   0]
 [  0 783]]
Data Invert Min Max
[[1112    0]
 [   0 1112]]
Data Invert Standard
[[1112    0]
 [   0 1112]]


In [24]:
for _, _, forest, label in random_forests:
    print (label)
    print (forest.predict(row))
    print (forest.predict(row_unc))

Data Min Max
[0]
[0]
Data Standard
[0]
[0]
Data Invert Min Max
[0]
[0]
Data Invert Standard
[0]
[0]


In [25]:
for _, _, forest, label in random_forests:
    print (label)
    scaled_row = forest.best_estimator_.named_steps['scale'].transform(row_unc)
    print (forest.best_estimator_.named_steps['rf'].predict(scaled_row))

Data Min Max
[0]
Data Standard
[0]
Data Invert Min Max
[0]
Data Invert Standard
[0]


In [47]:
forest_prefix = 'models/forest/'

for _, _, forest, label in random_forests:
    model_label = label.replace(' ', '')
    filename = f'{forest_prefix}{model_label}.pkl'
    joblib.dump(forest, filename)

### Logistic Regression

In [32]:
%%time

## Logistic Regrerssion
param_space_log_reg = {
    'lr__C' : [10**i for i in range(-3, 2)],
    'lr__penalty' : ['l1']
}

log_regs = []
for poss in sklearn_space:
    logreg_min_max = LogisticRegression(max_iter=int(1e4), solver='saga', n_jobs=-1)
    pipe = Pipeline(steps=[('scale', MinMaxScaler()), ('lr', logreg_min_max)])
    
    curr_labels = poss[1]
    curr_labels[curr_labels==0] = -1
    
    grid_search_logreg = GridSearchCV(estimator=pipe, param_grid=param_space_log_reg, cv=StratifiedKFold(n_splits=10))
    
    logreg_standard = LogisticRegression(max_iter=int(1e3), solver='saga', n_jobs=-1)
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()), ('lr', logreg_standard)])
    
    grid_search_logreg_standard = GridSearchCV(estimator=pipe_standard, param_grid=param_space_log_reg, cv=StratifiedKFold(n_splits=10))
    
    log_regs.append((poss[0], curr_labels, grid_search_logreg, poss[2] + ' Min Max'))
    log_regs.append((poss[0], curr_labels, grid_search_logreg_standard, poss[2] + ' Standard'))
    
for X, y, logreg, _ in log_regs:
    logreg.fit(X, y)
    
for X, y, logreg, label in log_regs:
    print (label)
    print ('{:.4f}'.format(logreg.score(X, y)))



Data Min Max
0.9460
Data Standard
0.9074
Data Invert Min Max
0.9137
Data Invert Standard
0.9101
CPU times: user 2min 31s, sys: 37.3 s, total: 3min 8s
Wall time: 56min 12s




In [33]:
for _, _, logreg, label in log_regs:
    print (label)
    print (logreg.best_params_)

Data Min Max
{'lr__C': 1, 'lr__penalty': 'l1'}
Data Standard
{'lr__C': 0.1, 'lr__penalty': 'l1'}
Data Invert Min Max
{'lr__C': 1, 'lr__penalty': 'l1'}
Data Invert Standard
{'lr__C': 0.1, 'lr__penalty': 'l1'}


In [34]:
for _, _, logreg, label in log_regs:
    print (label)
    coefs = logreg.best_estimator_.named_steps['lr'].coef_[0]
    coefs = [(idx, coef) for idx, coef in enumerate(coefs)]
    coefs.sort(key=lambda k: k[1], reverse=True)
    coef_idxs = [idx for idx, _ in coefs]
    
    top_coefs = coef_idxs[:5]
    bottom_coefs = coef_idxs[-5:]
    print (list(data.columns[top_coefs]))
    print (list(data.columns[bottom_coefs]))

# coefs = [(idx, coef) for idx, coef in enumerate(logreg.coef_[0])]
# coefs.sort(key=lambda k : k[1], reverse=True)

# coef_idxs = [idx for idx, _ in coefs]
# for idx, col in enumerate(data.columns[coef_idxs]):
#     print (f'{col}: {coefs[idx][1]}')

Data Min Max
['Opp AdjEM_team_one', 'fta_per_fga_pct_team_one', 'Seed_team_two', 'losses_conf_team_one', 'Team_team_one']
['Conference_team_one', 'AdjO_team_two', 'Seed_team_one', 'fta_per_fga_pct_team_two', 'Opp AdjEM_team_two']
Data Standard
['Opp AdjEM_team_one', 'fta_per_fga_pct_team_one', 'Seed_team_two', 'losses_conf_team_one', 'Conference_team_two']
['AdjO_team_two', 'Conference_team_one', 'Seed_team_one', 'fta_per_fga_pct_team_two', 'Opp AdjEM_team_two']
Data Invert Min Max
['Opp AdjEM_team_one', 'fta_per_fga_pct_team_one', 'Seed_team_two', 'AdjO_team_one', 'Conference_team_two']
['Conference_team_one', 'AdjO_team_two', 'Seed_team_one', 'fta_per_fga_pct_team_two', 'Opp AdjEM_team_two']
Data Invert Standard
['Opp AdjEM_team_one', 'fta_per_fga_pct_team_one', 'Seed_team_two', 'Team_team_one', 'Conference_team_two']
['Conference_team_one', 'week17_team_one', 'Seed_team_one', 'fta_per_fga_pct_team_two', 'Opp AdjEM_team_two']


In [35]:
for X, y, logreg, label in log_regs:
    print (label)
    y_pred = logreg.predict(X)
    print (confusion_matrix(y, y_pred))

Data Min Max
[[288  41]
 [ 19 764]]
Data Standard
[[259  70]
 [ 33 750]]
Data Invert Min Max
[[1016   96]
 [  96 1016]]
Data Invert Standard
[[1012  100]
 [ 100 1012]]


In [37]:
for _, _, logreg, label in log_regs:
    print (label)
    print (logreg.predict(row))
    print (logreg.predict(row_unc))

Data Min Max
[1]
[-1]
Data Standard
[1]
[-1]
Data Invert Min Max
[1]
[-1]
Data Invert Standard
[1]
[-1]


In [48]:
prefix = 'models/logreg/'

for _, _, logreg, label in log_regs:
    model_label = label.replace(' ', '')
    filename = f'{prefix}{model_label}.pkl'
    joblib.dump(logreg, filename)

### Gaussian Naive Bayes

In [39]:
%%time

param_space_gaussian = {
    
}

gaussians = []
for poss in sklearn_space:
    gaussian_min_max = GaussianNB()
    pipe = Pipeline(steps=[('scale', MinMaxScaler()), ('gauss', gaussian_min_max)])
    
    grid_search_gaussian = GridSearchCV(estimator=pipe, param_grid=param_space_gaussian, cv=StratifiedKFold(n_splits=10))
    
    gaussian_standard = GaussianNB()
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()), ('gauss', gaussian_standard)])
    
    grid_search_gaussian_standard = GridSearchCV(estimator=pipe_standard, param_grid=param_space_gaussian, cv=StratifiedKFold(n_splits=10))
    
    gaussians.append((poss[0], poss[1], grid_search_gaussian, poss[2] + ' Min Max'))
    gaussians.append((poss[0], poss[1], grid_search_gaussian_standard, poss[2] + ' Standard'))
    
for X, y, gaussian, _ in gaussians:
    gaussian.fit(X, y)
    
for X, y, gaussian, label in gaussians:
    print (label)
    print ('{:.4f}'.format(gaussian.score(X, y)))

Data Min Max
0.4263
Data Standard
0.3930
Data Invert Min Max
0.7203
Data Invert Standard
0.7203
CPU times: user 1.67 s, sys: 1.08 s, total: 2.75 s
Wall time: 2.77 s


### Perceptron

In [40]:
%%time

param_space_perceptron = {
    'perceptron__penalty' : [None, 'l2', 'l1', 'elasticnet'],
    'perceptron__alpha' : [10**-3]
}

perceptrons = []
for poss in sklearn_space:
    perceptron_min_max = Perceptron()
    pipe = Pipeline(steps=[('scale', MinMaxScaler()), ('perceptron', perceptron_min_max)])
    
    grid_search_perceptron = GridSearchCV(estimator=pipe, param_grid=param_space_perceptron, cv=StratifiedKFold(n_splits=10))
    
    perceptron_standard = Perceptron()
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()), ('perceptron', perceptron_standard)])
    
    grid_search_perceptron_standard = GridSearchCV(estimator=pipe_standard, param_grid=param_space_perceptron, cv=StratifiedKFold(n_splits=10))
    
    perceptrons.append((poss[0], poss[1], grid_search_perceptron, poss[2] + ' Min Max'))
    perceptrons.append((poss[0], poss[1], grid_search_perceptron_standard, poss[2] + ' Standard'))
    
for X, y, perceptron, _ in perceptrons:
    perceptron.fit(X, y)
    
for X, y, perceptron, label in perceptrons:
    print (label)
    print ('{:.4f}'.format(perceptron.score(X, y)))

Data Min Max
0.7698
Data Standard
0.9397
Data Invert Min Max
0.8022
Data Invert Standard
0.8844
CPU times: user 41.3 s, sys: 22.4 s, total: 1min 3s
Wall time: 27 s


In [41]:
for _, _, perceptron, label in perceptrons:
    print (label)
    print (perceptron.best_params_)

Data Min Max
{'perceptron__alpha': 0.001, 'perceptron__penalty': 'l1'}
Data Standard
{'perceptron__alpha': 0.001, 'perceptron__penalty': 'l1'}
Data Invert Min Max
{'perceptron__alpha': 0.001, 'perceptron__penalty': 'l1'}
Data Invert Standard
{'perceptron__alpha': 0.001, 'perceptron__penalty': 'l1'}


In [49]:
prefix = 'models/perceptron/'

for _, _, perceptron, label in perceptrons:
    model_label = label.replace(' ', '')
    filename = f'{prefix}{model_label}.pkl'
    joblib.dump(perceptron, filename)

### MLPClassifier

In [42]:
%%time

## Logistic Regrerssion
param_space_nn = {
#     'lr__C' : [10**i for i in range(-5, 2)]
}

nns = []
for poss in sklearn_space:
    num_features = poss[0].shape[1]
    nn_min_max = MLPClassifier(hidden_layer_sizes=(num_features,), 
#                                verbose=True, 
                               max_iter=int(1e3))
    pipe = Pipeline(steps=[('scale', MinMaxScaler()), ('nn', nn_min_max)])
    
    grid_search_nn = GridSearchCV(estimator=pipe, param_grid=param_space_nn, cv=StratifiedKFold(n_splits=10))
    
    nn_standard = MLPClassifier(hidden_layer_sizes=(num_features,), 
#                                 verbose=True, 
                                max_iter=int(1e3))
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()), ('nn', nn_standard)])
    
    grid_search_nn_standard = GridSearchCV(estimator=pipe_standard, param_grid=param_space_nn, cv=StratifiedKFold(n_splits=10))
    
    nns.append((poss[0], poss[1], grid_search_nn, poss[2] + ' Min Max'))
    nns.append((poss[0], poss[1], grid_search_nn_standard, poss[2] + ' Standard'))
    
for X, y, nn, _ in nns:
    nn.fit(X, y)
    
for X, y, nn, label in nns:
    print (label)
    print ('{:.4f}'.format(logreg.score(X, y)))

Data Min Max
0.9101
Data Standard
0.9101
Data Invert Min Max
0.9101
Data Invert Standard
0.9101
CPU times: user 40min 5s, sys: 15min 28s, total: 55min 34s
Wall time: 16min 34s


In [43]:
for X, y, nn, label in nns:
    print (label)
    y_pred = nn.predict(X)
    print (confusion_matrix(y, y_pred))

Data Min Max
[[329   0]
 [  0 783]]
Data Standard
[[329   0]
 [  0 783]]
Data Invert Min Max
[[1112    0]
 [   0 1112]]
Data Invert Standard
[[1112    0]
 [   0 1112]]


In [50]:
prefix = 'models/mlp/'

for _, _, nn, label in nns:
    model_label = label.replace(' ', '')
    filename = f'{prefix}{model_label}.pkl'
    joblib.dump(nn, filename)

## Predict Scores - Regression

In [51]:
%%time

## Random Forest
param_space_score_random_forest = {
#     'multi__estimator__n_estimators' : [100, 1000],
#     'multi__estimator__criterion' : ['mse', 'mae'],
#     'multi__estimator__bootstrap' : [True, False]
}

random_score_forests = []
for poss in sklearn_score_space:
    random_forest_min_max = RandomForestRegressor(n_jobs=-1)
    multi_output_min_max = MultiOutputRegressor(random_forest_min_max, n_jobs=-1)
    
    pipe = Pipeline(steps=[('scale', MinMaxScaler()),
                           ('multi', multi_output_min_max)])
    
    grid_search_forest = GridSearchCV(estimator=pipe, 
                                      param_grid=param_space_score_random_forest, 
                                      cv=10)
    
    random_forest_standard = RandomForestRegressor(n_jobs=-1)
    multi_output_standard = MultiOutputRegressor(random_forest_standard, n_jobs=-1)
    
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()),
                                    ('multi', multi_output_standard)])
    
    grid_search_forest_standard = GridSearchCV(estimator=pipe_standard, 
                                               param_grid=param_space_score_random_forest, 
                                               cv=10)
    
    random_score_forests.append((poss[0], poss[1], grid_search_forest, poss[2] + ' Min Max'))
    random_score_forests.append((poss[0], poss[1], grid_search_forest_standard, poss[2] + ' Standard'))

for X, y, forest, _ in random_score_forests:
    forest.fit(X, y)
    
for X, y, forest, label in random_score_forests:
    print (label)
    # this returns R^2 (not accuracy)
    print ('{:.4f}'.format(forest.score(X, y)))

Data Min Max
0.8778
Data Standard
0.8765
Data Invert Min Max
0.8886
Data Invert Standard
0.8877
CPU times: user 6.48 s, sys: 4.98 s, total: 11.5 s
Wall time: 23min 13s


In [52]:
for _, _, forest, label in random_score_forests:
    print (label)
    print (forest.best_params_)

Data Min Max
{}
Data Standard
{}
Data Invert Min Max
{}
Data Invert Standard
{}


In [53]:
for _, _, forest, label in random_score_forests:
    print (label)
    print (forest.predict(X=row))
    print (forest.predict(X=row_unc))

Data Min Max
[[63.81 71.84]]
[[69.11 71.06]]
Data Standard
[[62.25 70.08]]
[[68.02 71.18]]
Data Invert Min Max
[[60.24 67.59]]
[[67.72 70.53]]
Data Invert Standard
[[63.59 67.43]]
[[66.74 69.13]]


### Linear Regression

In [48]:
%%time

## Random Forest
param_space_score_linear_regression = {
    'multi__estimator__fit_intercept' : [True, False]
}

score_linear_regressions = []
for poss in sklearn_score_space:
    linear_none = LinearRegression()
    multi_none = MultiOutputRegressor(linear_none, n_jobs=-1)
    pipe_none = Pipeline(steps=[('multi', multi_none)])
    grid_search_none = GridSearchCV(estimator=pipe_none,
                                   param_grid=param_space_score_linear_regression,
                                   cv=10)
    
    linear_min_max = LinearRegression()
    multi_output_min_max = MultiOutputRegressor(linear_min_max, n_jobs=-1)
    
    pipe = Pipeline(steps=[('scale', MinMaxScaler()),
                           ('multi', multi_output_min_max)])
    
    grid_search_linear = GridSearchCV(estimator=pipe, 
                                      param_grid=param_space_score_linear_regression, 
                                      cv=10)
    
    linear_standard = LinearRegression()
    multi_output_standard = MultiOutputRegressor(linear_standard, n_jobs=-1)
    
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()),
                                    ('multi', multi_output_standard)])
    
    grid_search_linear_standard = GridSearchCV(estimator=pipe_standard, 
                                               param_grid=param_space_score_linear_regression, 
                                               cv=10)
    
    score_linear_regressions.append((poss[0], poss[1], grid_search_none, poss[2] + ' None'))
    score_linear_regressions.append((poss[0], poss[1], grid_search_linear, poss[2] + ' Min Max'))
    score_linear_regressions.append((poss[0], poss[1], grid_search_linear_standard, poss[2] + ' Standard'))

for X, y, lin, _ in score_linear_regressions:
    lin.fit(X, y)
    
for X, y, lin, label in score_linear_regressions:
    print (label)
    # this returns R^2 (not accuracy)
    print ('{:.4f}'.format(lin.score(X, y)))

Data None
0.8458
Data Min Max
0.8458
Data Standard
0.8458
Data Invert None
0.5756
Data Invert Min Max
0.5748
Data Invert Standard
0.5755
CPU times: user 3.73 s, sys: 10.5 s, total: 14.2 s
Wall time: 1min 58s


In [49]:
for _, _, lin, label in score_linear_regressions:
    print (label)
    print (lin.best_params_)

Data None
{'multi__estimator__fit_intercept': True}
Data Min Max
{'multi__estimator__fit_intercept': False}
Data Standard
{'multi__estimator__fit_intercept': True}
Data Invert None
{'multi__estimator__fit_intercept': True}
Data Invert Min Max
{'multi__estimator__fit_intercept': False}
Data Invert Standard
{'multi__estimator__fit_intercept': True}


In [50]:
for _, _, lin, label in score_linear_regressions:
    print (label)
    print (lin.predict(X=row))
    print (lin.predict(X=row_unc))

Data None
[[58.44418803 71.07457488]]
[[73.13338337 67.15193635]]
Data Min Max
[[58.44418803 71.07457488]]
[[73.13338337 67.15193635]]
Data Standard
[[58.44418803 71.07457488]]
[[73.13338337 67.15193635]]
Data Invert None
[[57.13170851 44.14608095]]
[[74.74826886 74.63820606]]
Data Invert Min Max
[[57.3678851  44.38225753]]
[[74.73944021 74.6293774 ]]
Data Invert Standard
[[57.13319458 44.1465042 ]]
[[74.7504211  74.66476279]]


### Ridge, Lasso, ElasticNet

In [54]:
%%time

## Random Forest
param_space_score_lasso = {
    'multi__estimator__alpha' : [10**i for i in range(-5, 2) if i != 0],
    'multi__estimator__positive' : [True, False],
    'multi__estimator__fit_intercept' : [True, False]
}

score_lassos = []
for poss in sklearn_score_space:
    lasso_min_max = Lasso()
    multi_output_min_max = MultiOutputRegressor(lasso_min_max, n_jobs=-1)
    
    pipe = Pipeline(steps=[('scale', MinMaxScaler()),
                           ('multi', multi_output_min_max)])
    
    grid_search_lasso = GridSearchCV(estimator=pipe, 
                                      param_grid=param_space_score_lasso, 
                                      cv=10)
    
    lasso_standard = Lasso()
    multi_output_standard = MultiOutputRegressor(lasso_standard, n_jobs=-1)
    
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()),
                                    ('multi', multi_output_standard)])
    
    grid_search_lasso_standard = GridSearchCV(estimator=pipe_standard, 
                                               param_grid=param_space_score_lasso, 
                                               cv=10)
    
    score_lassos.append((poss[0], poss[1], grid_search_lasso, poss[2] + ' Min Max'))
    score_lassos.append((poss[0], poss[1], grid_search_lasso_standard, poss[2] + ' Standard'))

for X, y, lin, _ in score_lassos:
    lin.fit(X, y)
    
for X, y, lin, label in score_lassos:
    print (label)
    # this returns R^2 (not accuracy)
    print ('{:.4f}'.format(lin.score(X, y)))

Data Min Max
0.2617
Data Standard
0.3156
Data Invert Min Max
0.3015
Data Invert Standard
0.3210
CPU times: user 32.2 s, sys: 24.8 s, total: 57 s
Wall time: 16min 50s


In [55]:
for _, _, lin, label in score_lassos:
    print (label)
    print (lin.best_params_)

Data Min Max
{'multi__estimator__alpha': 0.1, 'multi__estimator__fit_intercept': True, 'multi__estimator__positive': True}
Data Standard
{'multi__estimator__alpha': 0.1, 'multi__estimator__fit_intercept': True, 'multi__estimator__positive': True}
Data Invert Min Max
{'multi__estimator__alpha': 0.1, 'multi__estimator__fit_intercept': False, 'multi__estimator__positive': True}
Data Invert Standard
{'multi__estimator__alpha': 0.1, 'multi__estimator__fit_intercept': True, 'multi__estimator__positive': True}


In [56]:
%%time

## Random Forest
param_space_score_ridge = {
    'multi__estimator__alpha' : [10**i for i in range(-5, 5) if i != 0],
    'multi__estimator__fit_intercept' : [True, False]
}

score_ridges = []
for poss in sklearn_score_space:
    ridge_min_max = Ridge()
    multi_output_min_max = MultiOutputRegressor(ridge_min_max, n_jobs=-1)
    
    pipe = Pipeline(steps=[('scale', MinMaxScaler()),
                           ('multi', multi_output_min_max)])
    
    grid_search_ridge = GridSearchCV(estimator=pipe, 
                                      param_grid=param_space_score_ridge, 
                                      cv=10)
    
    ridge_standard = Ridge()
    multi_output_standard = MultiOutputRegressor(ridge_standard, n_jobs=-1)
    
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()),
                                    ('multi', multi_output_standard)])
    
    grid_search_ridge_standard = GridSearchCV(estimator=pipe_standard, 
                                               param_grid=param_space_score_ridge, 
                                               cv=10)
    
    score_ridges.append((poss[0], poss[1], grid_search_ridge, poss[2] + ' Min Max'))
    score_ridges.append((poss[0], poss[1], grid_search_ridge_standard, poss[2] + ' Standard'))

for X, y, lin, _ in score_ridges:
    lin.fit(X, y)
    
for X, y, lin, label in score_ridges:
    print (label)
    # this returns R^2 (not accuracy)
    print ('{:.4f}'.format(lin.score(X, y)))

Data Min Max
0.3259
Data Standard
0.4517
Data Invert Min Max
0.3417
Data Invert Standard
0.4275
CPU times: user 21.2 s, sys: 12.4 s, total: 33.6 s
Wall time: 1min 48s


In [57]:
for _, _, lin, label in score_ridges:
    print (label)
    print (lin.best_params_)

Data Min Max
{'multi__estimator__alpha': 100, 'multi__estimator__fit_intercept': True}
Data Standard
{'multi__estimator__alpha': 1000, 'multi__estimator__fit_intercept': True}
Data Invert Min Max
{'multi__estimator__alpha': 100, 'multi__estimator__fit_intercept': True}
Data Invert Standard
{'multi__estimator__alpha': 1000, 'multi__estimator__fit_intercept': True}


In [61]:
%%time

## Random Forest
param_space_score_elastic = {
    'multi__estimator__alpha' : [10**i for i in range(-5, 5) if i != 0],
    'multi__estimator__l1_ratio' : np.linspace(0, 1, num=5)
}

score_elastic_nets = []
for poss in sklearn_score_space:
    elastic_min_max = ElasticNet()
    multi_output_min_max = MultiOutputRegressor(elastic_min_max, n_jobs=-1)
    
    pipe = Pipeline(steps=[('scale', MinMaxScaler()),
                           ('multi', multi_output_min_max)])
    
    grid_search_elastic = GridSearchCV(estimator=pipe, 
                                      param_grid=param_space_score_elastic, 
                                      cv=10)
    
    elastic_standard = ElasticNet()
    multi_output_standard = MultiOutputRegressor(elastic_standard, n_jobs=-1)
    
    pipe_standard = Pipeline(steps=[('scale', StandardScaler()),
                                    ('multi', multi_output_standard)])
    
    grid_search_elastic_standard = GridSearchCV(estimator=pipe_standard, 
                                               param_grid=param_space_score_elastic, 
                                               cv=10)
    
    score_elastic_nets.append((poss[0], poss[1], grid_search_elastic, poss[2] + ' Min Max'))
    score_elastic_nets.append((poss[0], poss[1], grid_search_elastic_standard, poss[2] + ' Standard'))

for X, y, lin, _ in score_elastic_nets:
    lin.fit(X, y)
    
for X, y, lin, label in score_elastic_nets:
    print (label)
    # this returns R^2 (not accuracy)
    print ('{:.4f}'.format(lin.score(X, y)))

Data Min Max
0.2820
Data Standard
0.2200
Data Invert Min Max
0.2961
Data Invert Standard
0.4257
CPU times: user 1min, sys: 42.4 s, total: 1min 43s
Wall time: 33min 22s


In [131]:
for _, _, lin, label in score_elastic_nets:
    print (label)
    print (lin.best_params_)

Data Min Max
{'multi__estimator__alpha': 0.1}
Data Standard
{'multi__estimator__alpha': 10}
Data Invert Min Max
{'multi__estimator__alpha': 0.1}
Data Invert Standard
{'multi__estimator__alpha': 0.1}


In [60]:
np.linspace(0, 1, num=5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])