In [39]:
import tensorflow as tf
from tensorflow import keras

import externalTensor as exT
import numpy as np
from sklearn.model_selection import KFold
from boxScore import boxScore
import lime
from lime import lime_tabular
import pandas as pd

# Preparation of Data

In [40]:
years="2021-22"
# stats="traditional"
stats='advance'
box_score=boxScore(years,stats)

x_train, x_test, y_train, y_test=box_score.separation()
y_train=np.array(y_train)
y_test=np.array(y_test)

# Possible Hyperparameters

In [41]:
    
activation=['relu','sigmoid']
# activation=['relu']
number_neurons=[10,30,50,100,150,200]
# number_neurons=[10]
possible_learning_rate=[0.0001,0.001,0.01]
# possible_learning_rate=[0.001]

# Greedy Approch 

# Simple

In [19]:

kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelSimple(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


Best model {'learning_rate': 0.01, 'num_neurons': 10, 'acti_fun': 'relu', 'loss': 0.1255, 'acc': 0.9617, 'val_loss': 0.632, 'val_acc': 0.72}
Best model {'learning_rate': 0.001, 'num_neurons': 30, 'acti_fun': 'relu', 'loss': 0.5526, 'acc': 0.8823, 'val_loss': 0.6531, 'val_acc': 0.72}
Best model {'learning_rate': 0.01, 'num_neurons': 50, 'acti_fun': 'sigmoid', 'loss': 0.4192, 'acc': 0.92, 'val_loss': 0.6064, 'val_acc': 0.72}
Best model {'learning_rate': 0.01, 'num_neurons': 100, 'acti_fun': 'sigmoid', 'loss': 0.4257, 'acc': 0.9182, 'val_loss': 0.6053, 'val_acc': 0.76}
Best model {'learning_rate': 0.01, 'num_neurons': 150, 'acti_fun': 'relu', 'loss': 0.1184, 'acc': 0.9641, 'val_loss': 0.5609, 'val_acc': 0.74}
Best model {'learning_rate': 0.001, 'num_neurons': 200, 'acti_fun': 'relu', 'loss': 0.5886, 'acc': 0.8949, 'val_loss': 0.6478, 'val_acc': 0.74}


# TwoLevel

In [None]:

kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelTwoLevel(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


<!-- Best model {'learning_rate': 0.001, 'num_neurons': 10, 'acti_fun': 'relu', 'loss': 0.0193, 'acc': 0.9987, 'val_loss': 1.4627, 'val_acc': 0.6}
Best model {'learning_rate': 0.0001, 'num_neurons': 30, 'acti_fun': 'sigmoid', 'loss': 0.4884, 'acc': 0.9517, 'val_loss': 0.6466, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 50, 'acti_fun': 'sigmoid', 'loss': 0.5163, 'acc': 0.9437, 'val_loss': 0.656, 'val_acc': 0.66}
Best model {'learning_rate': 0.0001, 'num_neurons': 100, 'acti_fun': 'relu', 'loss': 0.0628, 'acc': 0.9951, 'val_loss': 0.775, 'val_acc': 0.64}
Best model {'learning_rate': 0.001, 'num_neurons': 150, 'acti_fun': 'relu', 'loss': 0.0135, 'acc': 0.9998, 'val_loss': 1.4478, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 200, 'acti_fun': 'relu', 'loss': 0.0487, 'acc': 1.0, 'val_loss': 0.8261, 'val_acc': 0.64} -->

# L2 Regolarization


In [None]:
kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelL2(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


# Dropout

In [None]:
kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModelDropout(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


# AUTOENCODERS

In [None]:
# Regularize label to 0 (win first team ) and 1 (win second team)
# Second Regularizion:  [1,0] (win first team ) and [0,1] (win second team)
# label_train=[]
# for el in y_train:
#     x=1    
#     if np.all(el==[1,0]):
#         x=0
    
#     label_train.append(x)
# label_test=[]
# for el in y_test:
#     x=1    
#     if np.all(el==[1,0]):
#         x=0
    
#     label_test.append(x)

# x_train["RESULT"]=label_train
# x_test["RESULT"]=label_test


In [42]:
from keras.datasets import mnist
import numpy as np


autoencoder,encoder,decoder = exT.autoencoder("relu",len(x_train.columns),20)
loss_fn = tf.keras.losses.BinaryCrossentropy()               
autoencoder.compile(optimizer="adam",loss=loss_fn,metrics=['accuracy'])
tik=autoencoder.fit(x_train,x_train,batch_size=200,verbose=False,epochs=50,validation_data=(x_test,x_test))



In [43]:
decoded_match_test = autoencoder.predict(x_test)
decoded_match_train = autoencoder.predict(x_train)



In [44]:
model = tf.keras.models.load_model('saved_model_'+stats+'/bestModel')

model.evaluate(np.asarray(x_test),np.asarray(y_test))

model.evaluate(decoded_match_test,np.asarray(y_test))



[1.3615206480026245, 0.5799999833106995]

# Random Forest

In [37]:
from sklearn.ensemble import RandomForestClassifier
best_acc=-1
for el in range(2,10):
    clf=RandomForestClassifier(max_depth=el, random_state=42)
    clf.fit(x_train,y_train)
    res=clf.predict(x_test)
    count=0
    for x,y in zip(res,y_test):
        if(x[0]==y[0]):
            count+=1
    acc=count/len(x_test)
    if acc> best_acc:
        best_acc=acc
print(best_acc,el)

0.5 9
