In [1]:
import tensorflow as tf
from tensorflow import keras

import externalTensor as exT
import numpy as np
from sklearn.model_selection import KFold
from boxScore import boxScore
import lime
from lime import lime_tabular
import pandas as pd

# Preparation of Data

In [2]:
years="2021-22"
# stats="traditional"
stats='advance'
box_score=boxScore(years,stats)

x_train, x_test, y_train, y_test=box_score.separation()
y_train=np.array(y_train)


# Possible Hyperparameters

In [3]:
    
activation=['relu','sigmoid']
# activation=['relu']
number_neurons=[10,30,50,100,150,200]
# number_neurons=[10]
possible_learning_rate=[0.0001,0.001,0.01]
# possible_learning_rate=[0.001]

# Greedy Approch 

# Simple

In [None]:

kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelSimple(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


# TwoLevel

In [None]:

kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelTwoLevel(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


<!-- Best model {'learning_rate': 0.001, 'num_neurons': 10, 'acti_fun': 'relu', 'loss': 0.0193, 'acc': 0.9987, 'val_loss': 1.4627, 'val_acc': 0.6}
Best model {'learning_rate': 0.0001, 'num_neurons': 30, 'acti_fun': 'sigmoid', 'loss': 0.4884, 'acc': 0.9517, 'val_loss': 0.6466, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 50, 'acti_fun': 'sigmoid', 'loss': 0.5163, 'acc': 0.9437, 'val_loss': 0.656, 'val_acc': 0.66}
Best model {'learning_rate': 0.0001, 'num_neurons': 100, 'acti_fun': 'relu', 'loss': 0.0628, 'acc': 0.9951, 'val_loss': 0.775, 'val_acc': 0.64}
Best model {'learning_rate': 0.001, 'num_neurons': 150, 'acti_fun': 'relu', 'loss': 0.0135, 'acc': 0.9998, 'val_loss': 1.4478, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 200, 'acti_fun': 'relu', 'loss': 0.0487, 'acc': 1.0, 'val_loss': 0.8261, 'val_acc': 0.64} -->

# L2 Regolarization


In [4]:
kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelL2(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


Best model {'learning_rate': 0.0001, 'num_neurons': 10, 'acti_fun': 'relu', 'loss': 1.106, 'acc': 0.9864, 'val_loss': 1.7072, 'val_acc': 0.58}
Best model {'learning_rate': 0.001, 'num_neurons': 30, 'acti_fun': 'relu', 'loss': 0.2146, 'acc': 0.9959, 'val_loss': 0.8235, 'val_acc': 0.62}
Best model {'learning_rate': 0.0001, 'num_neurons': 50, 'acti_fun': 'relu', 'loss': 2.5388, 'acc': 0.9891, 'val_loss': 3.0013, 'val_acc': 0.62}
Best model {'learning_rate': 0.0001, 'num_neurons': 100, 'acti_fun': 'relu', 'loss': 3.0065, 'acc': 1.0, 'val_loss': 3.3593, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 150, 'acti_fun': 'relu', 'loss': 3.2343, 'acc': 0.9943, 'val_loss': 3.6137, 'val_acc': 0.62}
Best model {'learning_rate': 0.0001, 'num_neurons': 200, 'acti_fun': 'sigmoid', 'loss': 4.1574, 'acc': 0.9944, 'val_loss': 3.6825, 'val_acc': 0.62}


# Dropout

In [None]:
kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelDropout(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf=RandomForestClassifier(max_depth=2, random_state=42)
clf.fit(x_train,y_train)
res=clf.predict(x_test)
count=0
for x,y in zip(res,y_test):
    if(x[0]==y[0]):
        count+=1
print(count/len(x_test))
