In [1]:
import tensorflow as tf
from tensorflow import keras
import externalTensor as exT
from sklearn.model_selection import KFold

import numpy as np


import pandas as pd
from boxScore import boxScore
import winsound
import wandb
from wandb.keras import WandbCallback

# Preparation of Data

In [2]:
years="2018-19"
stats="traditional"
# stats='advance'
box_score=boxScore(years,stats)

x_train, x_test, y_train, y_test=box_score.separation()
y_train=np.array(y_train)
y_test=np.array(y_test)
input_dimension=len(x_train.columns)

In [36]:
# df=pd.DataFrame(y_train,columns=["WinHome","WinAway"])
# df.to_csv("test1819ytrain.csv")

# Possible Hyperparameters

In [3]:
    
activation=['relu','sigmoid']
number_neurons=[10,30,50,100,150,200]
# number_neurons=[10,30]
possible_learning_rate=[0.0001,0.001,0.01]
# possible_learning_rate=[0.0001]
epochs=500
batch_size=150
kf = KFold(n_splits=10, random_state=1, shuffle=True)

# First 

In [20]:

best_model_array=[]
best_config_array=[]
best_history_array=[]
for num in number_neurons:
    best_config_tmp={'val_acc':0}
    best_history_tmp=[]
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelSimple(num,act,input_dimension)
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate  loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])
                config={
                    "learning_rate":el,
                    'num_neurons':num,
                    'acti_fun':act,
                    "batch_size": batch_size,
                    "epochs": epochs,
                    "architecture": "OneHiddenLayer",
                    "dataset":years+"_"+stats
                }
               
                
                # Train the model 
                config,history=exT.trainModelCrossValidation(config,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    epochs,
                    batch_size
                   
                )
                
                #Saved the best model based on: accuracy, number of neurons of first hidden level and type of stats
                if(config['val_acc']>best_config_tmp['val_acc']):
                    bestModel=model
                    best_config_tmp=config
                    best_history_tmp=history   
                    
                
    print("Best model",best_config_tmp)
    best_model_array.append(bestModel)
    best_config_array.append(best_config_tmp)
    best_history_array.append(best_history_tmp)



winsound.Beep(440,2500)

Best model {'learning_rate': 0.01, 'num_neurons': 10, 'acti_fun': 'sigmoid', 'batch_size': 200, 'epochs': 500, 'architecture': 'OneHiddenLayer', 'dataset': '2018-19_advance', 'loss': 0.20900047829970717, 'acc': 0.9599929609715938, 'val_loss': 0.8176719854831695, 'val_acc': 0.6437520051121711}
Best model {'learning_rate': 0.01, 'num_neurons': 30, 'acti_fun': 'relu', 'batch_size': 200, 'epochs': 500, 'architecture': 'OneHiddenLayer', 'dataset': '2018-19_advance', 'loss': 0.06979430980128236, 'acc': 0.975337403947115, 'val_loss': 1.9508475945949555, 'val_acc': 0.656120004528761}
Best model {'learning_rate': 0.001, 'num_neurons': 50, 'acti_fun': 'sigmoid', 'batch_size': 200, 'epochs': 500, 'architecture': 'OneHiddenLayer', 'dataset': '2018-19_advance', 'loss': 0.30580070467293263, 'acc': 0.9791448065102101, 'val_loss': 0.6325237509012223, 'val_acc': 0.7059440090596676}
Best model {'learning_rate': 0.001, 'num_neurons': 100, 'acti_fun': 'sigmoid', 'batch_size': 200, 'epochs': 500, 'architec

In [4]:
def wandbWrite(project_name,best_config_array,best_history_array,name_runs):
   
    for model_history,config in zip(best_history_array,best_config_array):
        run = wandb.init(project=project_name, config=config)
        name=name_runs+str(config["num_neurons"])
        run.name=name
        config = wandb.config
        for epoch in range (len(model_history[0])):
            wandb.log({'epochs': epoch,
                'loss': round(model_history[0][epoch],3),
                'acc': round(model_history[1][epoch],3), 
                'val_loss': round(model_history[2][epoch],3),
                'val_acc':round(model_history[3][epoch],3)
                }
                )
        
        run.finish()

In [None]:
wandb.login()

project_name="T"+stats[0:3].capitalize()+years[2:].replace("-","")+"runs"

wandbWrite(project_name,best_config_array,best_history_array,"onehidden_")


<!-- Best model {'learning_rate': 0.001, 'num_neurons': 10, 'acti_fun': 'relu', 'loss': 0.0193, 'acc': 0.9987, 'val_loss': 1.4627, 'val_acc': 0.6}
Best model {'learning_rate': 0.0001, 'num_neurons': 30, 'acti_fun': 'sigmoid', 'loss': 0.4884, 'acc': 0.9517, 'val_loss': 0.6466, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 50, 'acti_fun': 'sigmoid', 'loss': 0.5163, 'acc': 0.9437, 'val_loss': 0.656, 'val_acc': 0.66}
Best model {'learning_rate': 0.0001, 'num_neurons': 100, 'acti_fun': 'relu', 'loss': 0.0628, 'acc': 0.9951, 'val_loss': 0.775, 'val_acc': 0.64}
Best model {'learning_rate': 0.001, 'num_neurons': 150, 'acti_fun': 'relu', 'loss': 0.0135, 'acc': 0.9998, 'val_loss': 1.4478, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 200, 'acti_fun': 'relu', 'loss': 0.0487, 'acc': 1.0, 'val_loss': 0.8261, 'val_acc': 0.64} -->

# L2 Regolarization


In [5]:
best_model_array=[]
best_config_array=[]
best_history_array=[]
possible_pL2=[0.0001,0.001,0.01]
for num in number_neurons:
    best_config_tmp={'val_acc':0}
    best_history_tmp=[]
    for el in possible_learning_rate:
        for act in activation:
            for pl2 in possible_pL2:
                # Initialize model
                model = exT.makeModelL2(num,act,input_dimension,pl2)
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate  loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])
                config={
                    "learning_rate":el,
                    'num_neurons':num,
                    'acti_fun':act,
                    "batch_size": 200,
                    "epochs": epochs,
                    "architecture": "OneHiddenLayerL2",
                    "pl2":pl2,
                    "dataset":years+"_"+stats
                }
                

                config,history=exT.trainModelCrossValidation(config,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(config['val_acc']>best_config_tmp['val_acc']):
                    bestModel=model
                    best_config_tmp=config
                    best_history_tmp=history   
                        
                    
                
    print("Best model",best_config_tmp)
    best_model_array.append(bestModel)
    best_config_array.append(best_config_tmp)
    best_history_array.append(best_history_tmp)


Best model {'learning_rate': 0.0001, 'num_neurons': 10, 'acti_fun': 'sigmoid', 'batch_size': 200, 'epochs': 500, 'architecture': 'OneHiddenLayerL2', 'pl2': 0.001, 'dataset': '2018-19_traditional', 'loss': 0.6163995435774327, 'acc': 0.7627303644359111, 'val_loss': 0.7921850082039833, 'val_acc': 0.5745880062580109}
Best model {'learning_rate': 0.0001, 'num_neurons': 30, 'acti_fun': 'sigmoid', 'batch_size': 200, 'epochs': 500, 'architecture': 'OneHiddenLayerL2', 'pl2': 0.0001, 'dataset': '2018-19_traditional', 'loss': 0.8071493991732598, 'acc': 0.7861262950897216, 'val_loss': 1.147052707529068, 'val_acc': 0.5704319981098175}


In [None]:
winsound.Beep(440,2500)

In [None]:
wandb.login()

project_name="T"+stats[0:3].capitalize()+years[2:].replace("-","")+"runs"

wandbWrite(project_name,best_config_array,best_history_array,"onehiddenL2_")

# Dropout

In [None]:
best_model_array=[]
best_config_array=[]
best_history_array=[]
possible_p=[0.5,0.6,65]
for num in number_neurons:
    best_config_tmp={'val_acc':0}
    best_history_tmp=[]
    for el in possible_learning_rate:
        for act in activation:
            for p in possible_p:
                # Initialize model
                model = exT.makeModelDropout(num,act,input_dimension,p)
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])
                config={
                    "learning_rate":el,
                    'num_neurons':num,
                    'acti_fun':act,
                    "batch_size": 200,
                    "epochs": epochs,
                    "architecture": "OneHiddenLayerDropout",
                    'p-dropout':p,
                    "dataset":years+"_"+stats
                }
                
               

                config,history=exT.trainModelCrossValidation(config,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(config['val_acc']>best_config_tmp['val_acc']):
                    bestModel=model
                    best_config_tmp=config
                    best_history_tmp=history    
                    
                
    print("Best model",best_config_tmp)
    best_model_array.append(bestModel)
    best_config_array.append(best_config_tmp)
    best_history_array.append(best_history_tmp)
winsound.Beep(440,2500)

In [None]:
wandb.login()

project_name="T"+stats[0:3].capitalize()+years[2:].replace("-","")+"runs"

wandbWrite(project_name,best_config_array,best_history_array,"onehiddenDrop_")

# AUTOENCODERS ONE LEVEL

In [None]:


combination_of_neurons=[[20,10],[15,5]]
for comb in combination_of_neurons:
    best_config_tmp={'val_acc':0}
    for el in possible_learning_rate:
    
        autoencoder,encoder,decoder = exT.autoencoderOneLevel(input_dimension,comb[0],comb[1])
        loss_fn = tf.keras.losses.BinaryCrossentropy()               
        autoencoder.compile(optimizer="adam",loss=loss_fn,metrics=['accuracy'])
        config={
                "learning_rate":el,
                'num_neurons':comb,
                'acti_fun':"relu"
                
            }
        tik=autoencoder.fit(x_train,x_train,batch_size=200,verbose=False,epochs=50,validation_data=(x_test,x_test))
        config['val_acc']=tik.history['val_accuracy'][-1]
        if(config['val_acc']>best_config_tmp['val_acc']):
                    bestModel=autoencoder
                    best_config_tmp=config
                 
    print("Best model",best_config_tmp)
    # bestModelArray.append(bestModel)

In [None]:
model = tf.keras.models.load_model('saved_model_'+stats+'/bestModel')

optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate  loss function.
loss_fn = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

# AUTOENCODERS SECOND LEVEL

In [None]:
combination_of_neurons=[[20,15,5],[15,10,5]]
for comb in combination_of_neurons:
    best_config_tmp={'val_acc':0}
    for el in possible_learning_rate:
    
        autoencoder,encoder,decoder = exT.autoencoderTwoLevels(input_dimension,comb[0],comb[1],comb[2])
        loss_fn = tf.keras.losses.BinaryCrossentropy()               
        autoencoder.compile(optimizer="adam",loss=loss_fn,metrics=['accuracy'])
        config={
                "learning_rate":el,
                'num_neurons':comb,
                'acti_fun':"relu"
                
            }
        tik=autoencoder.fit(x_train,x_train,batch_size=200,verbose=False,epochs=50,validation_data=(x_test,x_test))
        config['val_acc']=tik.history['val_accuracy'][-1]
        if(config['val_acc']>best_config_tmp['val_acc']):
                    bestModel=autoencoder
                    best_config_tmp=config
                 
    print("Best model",best_config_tmp)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
best_acc=-1
for el in range(2,10):
    clf=RandomForestClassifier(max_depth=el, random_state=42)
    clf.fit(x_train,y_train)
    config=clf.predict(x_test)
    count=0
    for x,y in zip(config,y_test):
        if(x[0]==y[0]):
            count+=1
    acc=count/len(x_test)
    if acc> best_acc:
        best_acc=acc
print(best_acc,el)

In [None]:
para_y_train=[]
for el1 in y_train:
    tmp=1
    if(el1[0]==1):
        tmp=0
    para_y_train.append(tmp)

In [None]:
from sklearn.ensemble import RandomForestClassifier
best_acc=-1
for el in range(2,10):
    clf=RandomForestClassifier(n_estimators=100,max_depth=el, random_state=42)
    clf.fit(x_train,para_y_train)
    config=clf.predict(x_test)
    count=0
    for x,y in zip(config,y_test):
        if(x==y[1]):
            count+=1
    acc=count/len(x_test)
    if acc> best_acc:
        best_acc=acc
print(best_acc,el)

# ADA

In [None]:
from sklearn.ensemble import AdaBoostClassifier

best_acc=-1
clf = AdaBoostClassifier(n_estimators=100, random_state=42)
clf.fit(x_train,para_y_train)
config=clf.predict(x_test)
count=0
for x,y in zip(config,y_test):
    if(x==y[1]):
        count+=1
acc=count/len(x_test)
if acc> best_acc:
    best_acc=acc
print(best_acc)