In [1]:
import tensorflow as tf
from tensorflow import keras

import externalTensor as exT
import numpy as np
from sklearn.model_selection import KFold
from boxScore import boxScore
import lime
from lime import lime_tabular
import pandas as pd

# Preparation of Data

In [2]:
years="2020-21"
# stats="traditional"
stats='advance'
box_score=boxScore(years,stats)
x_train, x_test, y_train, y_test=box_score.separation()
y_train=np.array(y_train)


# Possible Hyperparameters

In [3]:
    
activation=['relu','sigmoid']
# activation=['relu']
number_neurons=[10,30,50,100,150,200]
# number_neurons=[10]
possible_learning_rate=[0.0001,0.001,0.01]
# possible_learning_rate=[0.001]


# Greedy Approch

In [4]:
kf = KFold(n_splits=10, random_state=1, shuffle=True)
bestModelArray=[]

for nN in number_neurons:
    bestRes={'val_acc':0}
    for el in possible_learning_rate:
        for act in activation:
                # Initialize model
                model = exT.makeModel(nN,act,len(x_train.columns))
                
                # Instantiate an optimizer to train the model.
                optimizer = keras.optimizers.SGD(learning_rate=el)
                # Instantiate a loss function.
                loss_fn = tf.keras.losses.BinaryCrossentropy()
                res={
                    "learning_rate":el,
                    'num_neurons':nN,
                    'acti_fun':act
                }
                
                model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])

                res=exT.train_alternative(res,x_train,y_train,
                    x_test, y_test, 
                    model,
                    kf,
                    500
                   
                )
                
                #Saved the best model based on the accuracy
                if(res['val_acc']>bestRes['val_acc']):
                    bestModel=model
                    bestRes=res
                        
                    
                
    print("Best model",bestRes)
    bestModelArray.append(bestModel)
    # f = open("saved_model_"+stats+"/summary.txt", "a")
    # tmpName=str(bestRes['num_neurons'])+"_"+str(bestRes['acti_fun'])+"_"+str(bestRes['learning_rate'])+"_LOSS_"+str(bestRes['loss'])+"_ACC_"+str(bestRes['acc'])+"_LOSSVAL_"+str(bestRes['val_loss'])+"_ACCVAL_"+str(bestRes['val_acc'])  
    # f.write(tmpName+"\n")
    # f.close()


Best model {'learning_rate': 0.0001, 'num_neurons': 10, 'acti_fun': 'sigmoid', 'loss': 0.9825, 'acc': 0.6198, 'val_loss': 0.6689, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 30, 'acti_fun': 'sigmoid', 'loss': 0.9953, 'acc': 0.5448, 'val_loss': 0.6656, 'val_acc': 0.66}
Best model {'learning_rate': 0.0001, 'num_neurons': 50, 'acti_fun': 'relu', 'loss': 0.9979, 'acc': 0.0575, 'val_loss': 0.9606, 'val_acc': 0.6}
Best model {'learning_rate': 0.0001, 'num_neurons': 100, 'acti_fun': 'relu', 'loss': 0.998, 'acc': 0.0639, 'val_loss': 0.8693, 'val_acc': 0.62}
Best model {'learning_rate': 0.0001, 'num_neurons': 150, 'acti_fun': 'relu', 'loss': 0.998, 'acc': 0.068, 'val_loss': 0.8127, 'val_acc': 0.64}
Best model {'learning_rate': 0.0001, 'num_neurons': 200, 'acti_fun': 'sigmoid', 'loss': 0.9977, 'acc': 0.4604, 'val_loss': 0.6632, 'val_acc': 0.62}


# Save / Load Model

In [None]:
def myModel(neuronNumbers,activation,input_dimension):
    
     
    inputs = keras.Input(shape=(input_dimension,), name="input")
    hidden = keras.layers.Dense(neuronNumbers, activation=activation,name="hidden")(inputs)
    
    hidden1 = keras.layers.Dense(neuronNumbers*2, activation=activation,name="hidden1")(hidden)
    droput= keras.layers.Dropout(.1,input_shape=(input_dimension,))(hidden1)
    outputs = keras.layers.Dense(2,activation=tf.keras.activations.softmax ,name="predictions")(droput)

    return keras.Model(inputs=inputs, outputs=outputs)

In [None]:
# bestModelArray[2].save("saved_model_"+stats+"/"+"bestModel")
model = tf.keras.models.load_model('saved_model_'+stats+'/bestModel') 
validation_data=(np.asarray(x_test),np.asarray(y_test))  
gg=model.evaluate(np.asarray(x_test),np.asarray(y_test),batch_size=200)
print("Validation accuracy",gg[1])
# model.summary()
newModel=myModel(50,"relu",len(x_train.columns))
optimizer = keras.optimizers.SGD(learning_rate=0.001)
                
loss_fn = tf.keras.losses.BinaryCrossentropy()
newModel.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])
res=newModel.fit(x_train,y_train,batch_size=200,validation_data=(np.asarray(x_test),np.asarray(y_test)),verbose=False,epochs=500)
print(res.history["accuracy"][-1])
print(res.history["val_accuracy"][-1])
# newModel.evaluate(,batch_size=200)



# LIME 

In [37]:


from locale import atof


model = tf.keras.models.load_model('saved_model_'+stats+'/bestModel') 
explainer = lime_tabular.LimeTabularExplainer(
                        training_data=np.array(x_train),
                        feature_names=x_train.columns,
                        class_names=['win first team', 'win second team'],
                        mode='classification'
                    )
answer=[]

list_index=[0,1,2,6]
# list_index=[6]
big_one=[]
for i in list_index:
    r=np.asarray(x_test)
    exp = explainer.explain_instance(
                data_row=x_test.iloc[i], 
                predict_fn=model.predict
            )
    prediction=model.predict(r[i][None,...])
    
    boolean_vector=np.rint(prediction)==y_test[i]
    value_boolean_dictionary=boolean_vector.all()



    res=exp.as_list()
   
    tmp=np.array(res)
    value_predestination=tmp[:,1].astype(float)
    total_sum_abs=np.sum( np.abs(value_predestination))
    tec=(value_predestination)/total_sum_abs
    
    

    string_predestination=tmp[:,0]
    
    answer=[ [x,z] for x,z in zip(string_predestination,tec)]
    

    dict=[]
    for count,ele in enumerate(string_predestination[0:4]):
        ecc=ele.split()
        range_div=[]
        for el in ecc:
            if el[0].isalpha():
                word=el
            else:
                if el!='<' and el!='<=' and el!='>=' and el!='>':
                    range_div.append(atof(el))

        dict.append([(range_div[0],range_div[1]),word,tec[count],value_boolean_dictionary])
    big_one.append(dict)




In [77]:
big_one=np.array(big_one)
big_one.reshape(16,4)
big_one.reshape(-1)
result=pd.DataFrame(big_one)
result.columns=["Range","Stats", "Percentage of decision", "Correct prediction"]
result[result["Stats"].str.contains("OFF_RATING")]

Unnamed: 0,Range,Stats,Percentage of decision,Correct prediction
0,"(109.45, 117.0)",OFF_RATING,-0.181291,True
2,"(101.9, 109.1)",OFF_RATING_O,-0.172608,True
4,"(109.1, 117.53)",OFF_RATING_O,0.227653,True
5,"(109.45, 117.0)",OFF_RATING,-0.167011,True
8,"(101.9, 109.45)",OFF_RATING,0.208852,True
9,"(109.1, 117.53)",OFF_RATING_O,0.19711,True
12,"(101.9, 109.45)",OFF_RATING,0.20163,False
14,"(109.1, 117.53)",OFF_RATING_O,0.162764,False


In [None]:
conclusion=[]
absvalue=[]
counts={}
oblio=0
for list in answer:
    for el in list[0:4]:
        
        stat=[word for word in el[0].split() if word[0].isalpha()] 
        conclusion.append([stat[0],abs(el[1])])
        if stat[0]  not in counts: 
            counts[stat[0]]=abs(el[1])
        else:
            
            counts[stat[0]]=counts.get(stat[0])+abs(el[1])

    oblio=1


sortdict = dict(sorted(counts.items(), key=lambda x:x[1],reverse=True))


In [None]:
# sortdict

# TODO

//Create two corpus one's for right prediction ones for wrong, understand the better range for both decision making  

Re-inizialiate the net with just the four main adv split (defense_rating, offensive_rating for both team)

Try this process also for tradional split 

Looking for a new tool? 

# Impression

Understand if it's possible improve the accuracy/network

Manage Lime Tool to understand better the network