## This files contains the Logistic Regression model using orginial variables

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import tensorflow as tf
#import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from sklearn.model_selection import KFold
from tabulate import tabulate

### Import Data

In [6]:
df_Average = pd.read_excel(r'C:\Users\linhc\OneDrive\Documents\Sproj\NgocLinhChiNguyen_SeniorProject2022\DataCollection\Importantfiles\TrainingValData\AverageTrainingVal.xlsx')
df_Skilled = pd.read_excel(r'C:\Users\linhc\OneDrive\Documents\Sproj\NgocLinhChiNguyen_SeniorProject2022\DataCollection\Importantfiles\TrainingValData\SkilledTrainingVal.xlsx')
df_Elite = pd.read_excel(r'C:\Users\linhc\OneDrive\Documents\Sproj\NgocLinhChiNguyen_SeniorProject2022\DataCollection\Importantfiles\TrainingValData\EliteTrainingVal.xlsx')

In [7]:
df_Elite

Unnamed: 0,blueWins,blueWardsPlaced,blueWardsDestroyed,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueHeraldsBarons,blueTowersDestroyed,...,redEliteMonsters,redDragons,redHeraldsBarons,redTowersDestroyed,redTotalGold,redTotalDamageToChampions,redAvgLevel,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff
0,0,32,5,10,15,15,0,0,0,1,...,2,2,0,1,26016,22836,8.4,279,69,1063
1,1,49,6,10,14,6,1,1,0,0,...,0,0,0,1,25835,20998,8.6,301,77,3740
2,1,26,2,17,10,24,2,1,1,0,...,0,0,0,1,23499,25089,8.6,268,80,-5804
3,1,106,8,10,8,18,2,1,1,0,...,1,1,0,0,21625,21831,8.4,297,76,-2583
4,1,31,10,22,7,24,3,2,1,2,...,0,0,0,0,21954,19995,8.4,293,79,-9229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8631,1,24,7,6,7,8,1,1,0,0,...,2,1,1,0,23558,19954,8.6,340,100,722
8632,1,20,8,15,9,13,1,1,0,1,...,1,0,1,1,24741,23249,8.8,317,79,-1783
8633,1,26,4,17,16,21,3,2,1,1,...,0,0,0,0,25491,31258,8.6,246,62,-2358
8634,1,27,9,7,13,11,1,1,0,0,...,1,0,1,1,25270,21193,9.0,297,68,2430


In [8]:
#Removing the redGoldDiff variable as it is a duplicate with blueGoldDiff
df_Elite = df_Elite.drop('redGoldDiff', axis=1)
df_Average = df_Average.drop('redGoldDiff', axis=1)
df_Skilled = df_Skilled.drop('redGoldDiff', axis=1)

### Checking for bias

In [9]:
def winPercentage(df):
    df = df.copy()
    y = df['blueWins']
    blueWin = sum(y)
    blue = blueWin/len(y)
    red = 1 - blue
    return blue,red

In [10]:
#Elite
blue,red = winPercentage(df_Elite)
print('blue wins: '+ str(blue))
print('red wins: ' + str(red))

blue wins: 0.5020842982862437
red wins: 0.4979157017137563


In [11]:
#Skilled
blue,red = winPercentage(df_Skilled)
print('blue wins: '+ str(blue))
print('red wins: ' + str(red))

blue wins: 0.49506564455018065
red wins: 0.5049343554498193


In [12]:
#Average
blue,red = winPercentage(df_Average)
print('blue wins: '+ str(blue))
print('red wins: ' + str(red))

blue wins: 0.5033655044695324
red wins: 0.49663449553046757


# Cross Validation spliting data

## Getting training-valdata

In [13]:
def splitingDataFold(df):
    df = df.copy()
    #spliting data test- training
    x = df.iloc[:, 1:30]
    y = df.iloc[:, 0]
    #Scale the data
    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    #splitting data into 5 folds
    #random state 42 to help keep the results the same for the report as we rerun the model multiple times.
    kf = KFold(n_splits=5, random_state = 42, shuffle=True)
    xTrain=[]
    yTrain=[]
    yTest=[]
    xTest=[]
    for train_index, test_index in kf.split(x):
        print("TRAIN:", train_index, "TEST:", test_index)
        xTrain.append(x[train_index])
        xTest.append(x[test_index])
        yTrain.append(y[train_index])
        yTest.append(y[test_index])
    
    return xTrain,xTest,yTrain,yTest
    

In [14]:
## Splitting the 3 groups

In [15]:
xTrain_Elite,xTest_Elite,yTrain_Elite,yTest_Elite = splitingDataFold(df_Elite)
print ("Training set: {} samples".format(len(xTrain_Elite[0])))
print ("Test set: {} samples".format(len(xTest_Elite[0])))

TRAIN: [   0    1    2 ... 8632 8633 8635] TEST: [   8   14   17 ... 8626 8629 8634]
TRAIN: [   1    2    3 ... 8633 8634 8635] TEST: [   0   15   29 ... 8601 8611 8630]
TRAIN: [   0    1    2 ... 8633 8634 8635] TEST: [   6   12   18 ... 8627 8628 8632]
TRAIN: [   0    2    3 ... 8630 8632 8634] TEST: [   1    7   11 ... 8631 8633 8635]
TRAIN: [   0    1    6 ... 8633 8634 8635] TEST: [   2    3    4 ... 8618 8621 8623]
Training set: 6908 samples
Test set: 1728 samples


In [16]:
xTrain_Skilled,xTest_Skilled,yTrain_Skilled,yTest_Skilled = splitingDataFold(df_Skilled)
print ("Training set: {} samples".format(len(xTrain_Skilled[0])))
print ("Test set: {} samples".format(len(xTest_Skilled[0])))

TRAIN: [    0     1     2 ... 22694 22695 22697] TEST: [    3    17    31 ... 22686 22691 22696]
TRAIN: [    1     2     3 ... 22695 22696 22697] TEST: [    0     4     5 ... 22685 22689 22693]
TRAIN: [    0     1     3 ... 22695 22696 22697] TEST: [    2     7    10 ... 22679 22690 22692]
TRAIN: [    0     2     3 ... 22695 22696 22697] TEST: [    1    12    18 ... 22673 22687 22694]
TRAIN: [    0     1     2 ... 22693 22694 22696] TEST: [    9    11    13 ... 22688 22695 22697]
Training set: 18158 samples
Test set: 4540 samples


In [17]:
xTrain_Average,xTest_Average,yTrain_Average,yTest_Average = splitingDataFold(df_Average)
print ("Training set: {} samples".format(len(xTrain_Average[0])))
print ("Test set: {} samples".format(len(xTest_Average[0])))

TRAIN: [    0     1     2 ... 56155 56156 56157] TEST: [    4     6    31 ... 56127 56146 56151]
TRAIN: [    2     3     4 ... 56155 56156 56157] TEST: [    0     1     7 ... 56143 56144 56154]
TRAIN: [    0     1     2 ... 56152 56154 56155] TEST: [    3     8    14 ... 56153 56156 56157]
TRAIN: [    0     1     2 ... 56155 56156 56157] TEST: [    5    15    18 ... 56145 56150 56152]
TRAIN: [    0     1     3 ... 56154 56156 56157] TEST: [    2     9    10 ... 56136 56141 56155]
Training set: 44926 samples
Test set: 11232 samples


### Training logistic regression

In [18]:
def trainingModelLogisticRegression(x_train,y_train,reg,sol):
    lr = LogisticRegression(penalty=reg, solver=sol,max_iter=1000)
    lr.fit(x_train,y_train)
    return lr

#### l1 regulization

In [19]:
trainedEliteLrL1=[]
trainedAverageLrL1=[]
trainedSkilledLrL1=[]
for i in range(0,5):
    trainedEliteLrL1.append(trainingModelLogisticRegression(xTrain_Elite[i],yTrain_Elite[i],'l1','saga'))
    trainedAverageLrL1.append(trainingModelLogisticRegression(xTrain_Average[i],yTrain_Average[i],'l1','saga'))
    trainedSkilledLrL1.append(trainingModelLogisticRegression(xTrain_Skilled[i],yTrain_Skilled[i],'l1','saga'))

#### l2 regulization

In [20]:
trainedEliteLrL2=[]
trainedAverageLrL2=[]
trainedSkilledLrL2=[]
for i in range(0,5):
    trainedEliteLrL2.append(trainingModelLogisticRegression(xTrain_Elite[i],yTrain_Elite[i],'l2','saga'))
    trainedAverageLrL2.append(trainingModelLogisticRegression(xTrain_Average[i],yTrain_Average[i],'l2','saga'))
    trainedSkilledLrL2.append(trainingModelLogisticRegression(xTrain_Skilled[i],yTrain_Skilled[i],'l2','saga'))

#### No Regulaztion

In [21]:
trainedEliteLrNone=[]
trainedAverageLrNone=[]
trainedSkilledLrNone=[]
for i in range(0,5):
    trainedEliteLrNone.append(trainingModelLogisticRegression(xTrain_Elite[i],yTrain_Elite[i],'none','saga'))
    trainedAverageLrNone.append(trainingModelLogisticRegression(xTrain_Average[i],yTrain_Average[i],'none','saga'))
    trainedSkilledLrNone.append(trainingModelLogisticRegression(xTrain_Skilled[i],yTrain_Skilled[i],'none','saga'))

### Tesing Model

In [22]:
def testingLr(lr,x_test,y_test):
    name = ['','fold 1','fold 2','fold 3','fold 4','fold 5','Average']
    acc = ['Accuracy']
    pre = ['Precision']
    re = ['Recall']
    f1 = ['F1 Score']
    sumacc=0
    sumf1=0
    sumpre=0
    sumre= 0

    for i in range(0,5):
        
        testEliteLr = lr[i].predict(x_test[i])
        testEliteLr_prob = lr[i].predict_proba(x_test[i])
        
        accs= accuracy_score(y_test[i], testEliteLr)
        pres= precision_score(y_test[i], testEliteLr)
        res = recall_score(y_test[i], testEliteLr)
        f1s = f1_score(y_test[i], testEliteLr)
        
        sumacc =sumacc+accs
        sumpre =sumpre+pres
        sumre = sumre +res
        sumf1 =sumf1+f1s
        
        acc.append(' {:.5f}'.format(accs))
        pre.append(' {:.5f}'.format(pres))
        re.append(' {:.5f}'.format(res))
        f1.append(' {:.5f}'.format(f1s))
        
    acc.append(' {:.5f}'.format(sumacc/5))
    pre.append(' {:.5f}'.format(sumpre/5))
    re.append(' {:.5f}'.format(sumre/5))
    f1.append(' {:.5f}'.format(sumf1/5))
    
    table=[]
    table.append(name)
    table.append(acc)
    table.append(pre)
    table.append(re)
    table.append(f1)
    
    print(tabulate(table))


## Importing Testing Data 
Getting the 10% of testing data for each group that have not been touch 

In [23]:
df_AverageTest = pd.read_excel(r'C:\Users\linhc\OneDrive\Documents\Sproj\NgocLinhChiNguyen_SeniorProject2022\DataCollection\Importantfiles\TestingData\AverageTesting.xlsx')
df_SkilledTest = pd.read_excel(r'C:\Users\linhc\OneDrive\Documents\Sproj\NgocLinhChiNguyen_SeniorProject2022\DataCollection\Importantfiles\TestingData\SkilledTesting.xlsx')
df_EliteTest = pd.read_excel(r'C:\Users\linhc\OneDrive\Documents\Sproj\NgocLinhChiNguyen_SeniorProject2022\DataCollection\Importantfiles\TestingData\EliteTesting.xlsx')

In [24]:
df_EliteTest

Unnamed: 0,blueWins,blueWardsPlaced,blueWardsDestroyed,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueHeraldsBarons,blueTowersDestroyed,...,redEliteMonsters,redDragons,redHeraldsBarons,redTowersDestroyed,redTotalGold,redTotalDamageToChampions,redAvgLevel,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff
0,0,22,11,9,12,13,1,0,1,1,...,2,2,0,1,25616,25521,8.8,280,92,1102
1,1,21,2,21,9,27,2,1,1,2,...,1,1,0,0,24381,25609,8.8,303,87,-6699
2,0,23,3,10,13,12,0,0,0,0,...,1,1,0,1,26278,22317,9.0,299,97,2178
3,1,26,7,15,9,15,1,0,1,3,...,1,1,0,0,23139,21203,8.6,303,67,-5808
4,1,29,5,21,17,23,1,0,1,1,...,1,1,0,0,28603,28449,8.8,254,81,-1797
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
954,1,24,2,15,10,17,1,0,1,0,...,1,1,0,0,22568,25715,8.4,265,69,-2611
955,0,26,5,16,11,23,3,2,1,0,...,0,0,0,1,24944,21386,9.0,292,72,-3159
956,1,117,13,10,9,9,2,1,1,1,...,0,0,0,0,22375,22877,8.4,310,55,-4104
957,0,23,1,7,8,9,1,0,1,2,...,1,1,0,0,22897,24149,8.6,310,82,-746


### Scale and separate Testing Data

In [26]:
def splittingDataTesting(df):
    df = df.copy()
    #spliting x -y
    x = df.iloc[:, 1:33]
    y = df.iloc[:, 0]
    #Scale the data
    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    return x,y

In [54]:
#Removing the redGoldDiff variable as it is a duplicate with blueGoldDiff
df_EliteTest = df_EliteTest.drop('redGoldDiff', axis=1)
df_AverageTest = df_AverageTest.drop('redGoldDiff', axis=1)
df_SkilledTest = df_SkilledTest.drop('redGoldDiff', axis=1)
#Splitting the testing data
xTest_EliteTest,yTest_EliteTest = splittingDataTesting(df_EliteTest)
xTest_AverageTest,yTest_AverageTest = splittingDataTesting(df_AverageTest)
xTest_SkilledTest,yTest_SkilledTest = splittingDataTesting(df_SkilledTest)

### Coefficients

In [55]:
def coefficientsLr(model):
    coefficients = np.hstack((model.intercept_,model.coef_[0]))
    name = ['blueWardsPlaced','blueWardsDestroyed','blueKills','blueDeaths','blueAssists','blueEliteMonsters','blueDragons','blueHeraldsBarons','blueTowersDestroyed','blueTotalGold','blueTotalDamageToChampions','blueAvgLevel','blueTotalMinionsKilled','blueTotalJungleMinionsKilled','blueGoldDiff','redWardsPlaced','redWardsDestroyed','redKills','redDeaths','redAssists','redEliteMonsters','redDragons','redHeraldsBarons','redTowersDestroyed','redTotalGold','redTotalDamageToChampions','redAvgLevel','redTotalMinionsKilled','redTotalJungleMinionsKilled']
    return name,coefficients

### Elite Data

#### Testing data

In [56]:
#No regularization
testingLr(trainedEliteLrNone,xTest_Elite,yTest_Elite)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.79630  0.78402  0.78112  0.77302  0.76954  0.78080
Precision  0.80930  0.79172  0.78281  0.76297  0.77588  0.78453
Recall     0.77956  0.77252  0.78815  0.77208  0.77237  0.77694
F1 Score   0.79415  0.78200  0.78547  0.76750  0.77412  0.78065
---------  -------  -------  -------  -------  -------  -------


In [57]:
#L1 regularization
testingLr(trainedEliteLrL1,xTest_Elite,yTest_Elite)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.79572  0.78344  0.78112  0.77302  0.76896  0.78045
Precision  0.80907  0.79147  0.78281  0.76359  0.77500  0.78439
Recall     0.77842  0.77136  0.78815  0.77088  0.77237  0.77624
F1 Score   0.79345  0.78129  0.78547  0.76722  0.77368  0.78022
---------  -------  -------  -------  -------  -------  -------


In [58]:
#L2 regularization
testingLr(trainedEliteLrL2,xTest_Elite,yTest_Elite)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.79630  0.78402  0.78112  0.77360  0.76954  0.78092
Precision  0.80930  0.79172  0.78281  0.76387  0.77588  0.78471
Recall     0.77956  0.77252  0.78815  0.77208  0.77237  0.77694
F1 Score   0.79415  0.78200  0.78547  0.76795  0.77412  0.78074
---------  -------  -------  -------  -------  -------  -------


#### Training data

In [59]:
#No regularization
testingLr(trainedEliteLrNone,xTrain_Elite,yTrain_Elite)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.77721  0.78000  0.78318  0.78231  0.78463  0.78147
Precision  0.78076  0.78376  0.78672  0.78699  0.78720  0.78508
Recall     0.77287  0.77608  0.77762  0.78159  0.77990  0.77761
F1 Score   0.77679  0.77990  0.78214  0.78428  0.78353  0.78133
---------  -------  -------  -------  -------  -------  -------


In [60]:
#L1 regularization
testingLr(trainedEliteLrL1,xTrain_Elite,yTrain_Elite)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.77736  0.78043  0.78391  0.78260  0.78448  0.78176
Precision  0.78115  0.78411  0.78736  0.78728  0.78713  0.78541
Recall     0.77258  0.77666  0.77848  0.78188  0.77961  0.77784
F1 Score   0.77684  0.78037  0.78290  0.78457  0.78336  0.78161
---------  -------  -------  -------  -------  -------  -------


In [61]:
#L2 regularization
testingLr(trainedEliteLrL2,xTrain_Elite,yTrain_Elite)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.77707  0.78014  0.78362  0.78231  0.78477  0.78158
Precision  0.78069  0.78382  0.78724  0.78699  0.78709  0.78517
Recall     0.77258  0.77637  0.77791  0.78159  0.78048  0.77779
F1 Score   0.77662  0.78008  0.78255  0.78428  0.78377  0.78146
---------  -------  -------  -------  -------  -------  -------


#### Coefficient

In [62]:
#Take fold 5 as L1 regularization has highest accuracy
#L1 regularization
name,coef = coefficientsLr(trainedEliteLrL1[4])
df = pd.DataFrame(data={'variable': ['Intercept'] + name, 'coefficient': coef})
df = df.sort_values(by='coefficient')
print(df.to_markdown())

|    | variable                     |   coefficient |
|---:|:-----------------------------|--------------:|
| 25 | redTotalGold                 |   -0.197617   |
| 22 | redDragons                   |   -0.155888   |
| 27 | redAvgLevel                  |   -0.148006   |
| 28 | redTotalMinionsKilled        |   -0.0876467  |
| 26 | redTotalDamageToChampions    |   -0.0456926  |
| 21 | redEliteMonsters             |   -0.0361148  |
| 18 | redKills                     |   -0.0100832  |
|  4 | blueDeaths                   |   -0.0100832  |
| 16 | redWardsPlaced               |   -0.00108886 |
| 20 | redAssists                   |   -0.00107835 |
|  1 | blueWardsPlaced              |    0          |
| 23 | redHeraldsBarons             |    0          |
|  8 | blueHeraldsBarons            |    0          |
| 10 | blueTotalGold                |    0          |
| 13 | blueTotalMinionsKilled       |    0.00850454 |
|  6 | blueEliteMonsters            |    0.01411    |
| 29 | redTotalJungleMinions

### Skilled Data

#### Testing data

In [63]:
#No regularization
testingLr(trainedSkilledLrNone,xTest_Skilled,yTest_Skilled)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76278  0.75969  0.76674  0.75479  0.76184  0.76117
Precision  0.75632  0.75795  0.77317  0.75701  0.74989  0.75887
Recall     0.76344  0.75963  0.75930  0.74389  0.76718  0.75869
F1 Score   0.75987  0.75879  0.76617  0.75039  0.75844  0.75873
---------  -------  -------  -------  -------  -------  -------


In [64]:
#L1 regularization
testingLr(trainedSkilledLrL1,xTest_Skilled,yTest_Skilled)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76256  0.75925  0.76652  0.75435  0.76162  0.76086
Precision  0.75599  0.75774  0.77307  0.75633  0.74956  0.75854
Recall     0.76344  0.75874  0.75886  0.74389  0.76718  0.75842
F1 Score   0.75970  0.75824  0.76590  0.75006  0.75827  0.75843
---------  -------  -------  -------  -------  -------  -------


In [65]:
#L2 regularization
testingLr(trainedSkilledLrL2,xTest_Skilled,yTest_Skilled)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76278  0.75969  0.76652  0.75479  0.76206  0.76117
Precision  0.75632  0.75795  0.77283  0.75701  0.75022  0.75887
Recall     0.76344  0.75963  0.75930  0.74389  0.76718  0.75869
F1 Score   0.75987  0.75879  0.76600  0.75039  0.75861  0.75873
---------  -------  -------  -------  -------  -------  -------


#### Training data

In [66]:
#No regularization
testingLr(trainedSkilledLrNone,xTrain_Skilled,yTrain_Skilled)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76187  0.76291  0.75989  0.76381  0.76034  0.76176
Precision  0.75985  0.76074  0.75757  0.76149  0.75947  0.75983
Recall     0.76002  0.75930  0.75436  0.76124  0.75778  0.75854
F1 Score   0.75994  0.76002  0.75596  0.76136  0.75862  0.75918
---------  -------  -------  -------  -------  -------  -------


In [67]:
#L1 regularization
testingLr(trainedSkilledLrL1,xTrain_Skilled,yTrain_Skilled)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76181  0.76291  0.75994  0.76392  0.76056  0.76183
Precision  0.75977  0.76068  0.75754  0.76149  0.75952  0.75980
Recall     0.76002  0.75941  0.75458  0.76157  0.75834  0.75878
F1 Score   0.75990  0.76005  0.75606  0.76153  0.75893  0.75929
---------  -------  -------  -------  -------  -------  -------


In [68]:
#L2 regularization
testingLr(trainedSkilledLrL2,xTrain_Skilled,yTrain_Skilled)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76192  0.76302  0.75989  0.76381  0.76028  0.76179
Precision  0.75988  0.76091  0.75757  0.76149  0.75938  0.75985
Recall     0.76013  0.75930  0.75436  0.76124  0.75778  0.75856
F1 Score   0.76001  0.76010  0.75596  0.76136  0.75858  0.75920
---------  -------  -------  -------  -------  -------  -------


#### Coefficients

In [69]:
#Take fold 2 as L1 regularization has highest accuracy
#L1 regularization
name,coef = coefficientsLr(trainedSkilledLrL1[1])
df = pd.DataFrame(data={'variable': ['Intercept'] + name, 'coefficient': coef})
df = df.sort_values(by='coefficient')
print(df.to_markdown())

|    | variable                     |   coefficient |
|---:|:-----------------------------|--------------:|
| 25 | redTotalGold                 |   -0.234657   |
| 27 | redAvgLevel                  |   -0.174537   |
| 26 | redTotalDamageToChampions    |   -0.141368   |
| 22 | redDragons                   |   -0.088841   |
| 29 | redTotalJungleMinionsKilled  |   -0.0637724  |
| 21 | redEliteMonsters             |   -0.0522109  |
| 17 | redWardsDestroyed            |   -0.0288642  |
|  0 | Intercept                    |   -0.0241041  |
| 20 | redAssists                   |   -0.0185315  |
|  9 | blueTowersDestroyed          |   -0.0115989  |
| 16 | redWardsPlaced               |   -0.0078935  |
| 28 | redTotalMinionsKilled        |   -0.0015063  |
|  8 | blueHeraldsBarons            |    0          |
| 10 | blueTotalGold                |    0          |
| 23 | redHeraldsBarons             |    0          |
|  1 | blueWardsPlaced              |    0.00140638 |
|  4 | blueDeaths           

### Average Data

#### Testing data

In [70]:
#No regularization
testingLr(trainedAverageLrNone,xTest_Average,yTest_Average)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.75908  0.76264  0.76006  0.76289  0.76761  0.76246
Precision  0.76412  0.76235  0.75999  0.76141  0.77412  0.76440
Recall     0.75739  0.77014  0.76256  0.76615  0.76100  0.76345
F1 Score   0.76074  0.76622  0.76127  0.76377  0.76750  0.76390
---------  -------  -------  -------  -------  -------  -------


In [71]:
#L1 regularization
testingLr(trainedAverageLrL1,xTest_Average,yTest_Average)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.75908  0.76273  0.76006  0.76271  0.76770  0.76246
Precision  0.76412  0.76248  0.75999  0.76114  0.77416  0.76438
Recall     0.75739  0.77014  0.76256  0.76615  0.76117  0.76348
F1 Score   0.76074  0.76629  0.76127  0.76364  0.76761  0.76391
---------  -------  -------  -------  -------  -------  -------


In [72]:
#L2 regularization
testingLr(trainedAverageLrL2,xTest_Average,yTest_Average)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.75908  0.76255  0.76015  0.76289  0.76779  0.76249
Precision  0.76412  0.76221  0.76004  0.76141  0.77421  0.76440
Recall     0.75739  0.77014  0.76273  0.76615  0.76135  0.76355
F1 Score   0.76074  0.76616  0.76138  0.76377  0.76772  0.76396
---------  -------  -------  -------  -------  -------  -------


#### Training data

In [73]:
#No regularization
testingLr(trainedAverageLrNone,xTrain_Average,yTrain_Average)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76417  0.76259  0.76301  0.76275  0.76184  0.76287
Precision  0.76567  0.76435  0.76476  0.76477  0.76342  0.76459
Recall     0.76510  0.76327  0.76486  0.76454  0.76321  0.76419
F1 Score   0.76538  0.76381  0.76481  0.76466  0.76332  0.76439
---------  -------  -------  -------  -------  -------  -------


In [74]:
#L1 regularization
testingLr(trainedAverageLrL1,xTrain_Average,yTrain_Average)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76412  0.76254  0.76308  0.76266  0.76190  0.76286
Precision  0.76563  0.76428  0.76483  0.76469  0.76352  0.76459
Recall     0.76505  0.76327  0.76490  0.76445  0.76321  0.76418
F1 Score   0.76534  0.76377  0.76487  0.76457  0.76337  0.76438
---------  -------  -------  -------  -------  -------  -------


In [75]:
#L2 regularization
testingLr(trainedAverageLrL2,xTrain_Average,yTrain_Average)

---------  -------  -------  -------  -------  -------  -------
           fold 1   fold 2   fold 3   fold 4   fold 5   Average
Accuracy   0.76421  0.76257  0.76303  0.76277  0.76186  0.76289
Precision  0.76572  0.76429  0.76479  0.76478  0.76343  0.76460
Recall     0.76514  0.76331  0.76486  0.76458  0.76326  0.76423
F1 Score   0.76543  0.76380  0.76482  0.76468  0.76334  0.76442
---------  -------  -------  -------  -------  -------  -------


#### Coefficients

In [76]:
#Take fold 2 as L1 regularization has highest accuracy
#L1 regularization
name,coef = coefficientsLr(trainedAverageLrL1[1])
df = pd.DataFrame(data={'variable': ['Intercept'] + name, 'coefficient': coef})
df = df.sort_values(by='coefficient')
print(df.to_markdown())

|    | variable                     |   coefficient |
|---:|:-----------------------------|--------------:|
| 22 | redDragons                   |  -0.152529    |
| 26 | redTotalDamageToChampions    |  -0.111351    |
| 27 | redAvgLevel                  |  -0.110439    |
| 28 | redTotalMinionsKilled        |  -0.0912376   |
|  9 | blueTowersDestroyed          |  -0.0572496   |
| 21 | redEliteMonsters             |  -0.0569013   |
| 18 | redKills                     |  -0.0446383   |
|  4 | blueDeaths                   |  -0.0446383   |
|  5 | blueAssists                  |  -0.0349912   |
| 29 | redTotalJungleMinionsKilled  |  -0.033057    |
|  2 | blueWardsDestroyed           |  -0.00762686  |
| 16 | redWardsPlaced               |  -0.000267576 |
| 25 | redTotalGold                 |   0           |
| 23 | redHeraldsBarons             |   0           |
|  8 | blueHeraldsBarons            |   0           |
|  1 | blueWardsPlaced              |   0.00131076  |
| 17 | redWardsDestroyed    

### Testing Data Logistic Regression

In [77]:
def testingTestData(lr,x_test,y_test):       
    testEliteLr = lr.predict(x_test)
    
    accs= accuracy_score(y_test, testEliteLr)
    pres= precision_score(y_test, testEliteLr)
    res = recall_score(y_test, testEliteLr)
    f1s = f1_score(y_test, testEliteLr)
         
    print('Accuracy: {:.5f}'.format(accs))
    print('Precision {:.5f}'.format(pres))
    print('Recall {:.5f}'.format(res))
    print('F1 Score {:.5f}'.format(f1s))

#### Elite Data Testing

In [78]:
#Using Fold 4 L1 regularization model for Elite data set because it has highest Accuracy 
testingTestData(trainedEliteLrL1[3],xTest_EliteTest,yTest_EliteTest)

Accuracy: 0.79145
Precision 0.79630
Recall 0.79303
F1 Score 0.79466


In [79]:
#Coefficients
name,coef = coefficientsLr(trainedEliteLrL1[3])
df = pd.DataFrame(data={'variable': ['Intercept'] + name, 'coefficient': coef})
df = df.sort_values(by='coefficient')
print(df.to_markdown())

|    | variable                     |   coefficient |
|---:|:-----------------------------|--------------:|
| 25 | redTotalGold                 |   -0.278067   |
| 22 | redDragons                   |   -0.103444   |
| 27 | redAvgLevel                  |   -0.103443   |
| 26 | redTotalDamageToChampions    |   -0.0779063  |
| 28 | redTotalMinionsKilled        |   -0.0485224  |
| 29 | redTotalJungleMinionsKilled  |   -0.0333887  |
| 18 | redKills                     |   -0.00792012 |
|  4 | blueDeaths                   |   -0.00792012 |
| 21 | redEliteMonsters             |   -0.00658194 |
|  1 | blueWardsPlaced              |   -0.0043859  |
| 23 | redHeraldsBarons             |    0          |
|  8 | blueHeraldsBarons            |    0          |
| 10 | blueTotalGold                |    0          |
| 20 | redAssists                   |    0          |
|  6 | blueEliteMonsters            |    0.00086629 |
|  9 | blueTowersDestroyed          |    0.00412361 |
| 16 | redWardsPlaced       

#### Skilled Data Testing

In [80]:
#Using Fold 3 L1 regularization model for Skilled data set because it has highest Accuracy 
testingTestData(trainedSkilledLrL1[2],xTest_SkilledTest,yTest_SkilledTest)

Accuracy: 0.74346
Precision 0.77203
Recall 0.70675
F1 Score 0.73795


In [81]:
#Coefficients
name,coef = coefficientsLr(trainedSkilledLrL1[2])
df = pd.DataFrame(data={'variable': ['Intercept'] + name, 'coefficient': coef})
df = df.sort_values(by='coefficient')
print(df.to_markdown())

|    | variable                     |   coefficient |
|---:|:-----------------------------|--------------:|
| 27 | redAvgLevel                  |   -0.22197    |
| 26 | redTotalDamageToChampions    |   -0.16648    |
| 25 | redTotalGold                 |   -0.164727   |
| 22 | redDragons                   |   -0.093069   |
|  9 | blueTowersDestroyed          |   -0.0436108  |
|  0 | Intercept                    |   -0.0421096  |
| 29 | redTotalJungleMinionsKilled  |   -0.041291   |
| 20 | redAssists                   |   -0.0412462  |
| 17 | redWardsDestroyed            |   -0.0407748  |
| 21 | redEliteMonsters             |   -0.0404592  |
| 16 | redWardsPlaced               |   -0.0188793  |
|  8 | blueHeraldsBarons            |    0          |
| 10 | blueTotalGold                |    0          |
| 23 | redHeraldsBarons             |    0          |
| 28 | redTotalMinionsKilled        |    0.00132803 |
|  1 | blueWardsPlaced              |    0.00813174 |
| 24 | redTowersDestroyed   

#### Average Data Testing

In [82]:
#Using Fold 1 L1 regularization model for Avarage data set because it has highest Accuracy 
testingTestData(trainedAverageLrL1[0],xTest_AverageTest,yTest_AverageTest)

Accuracy: 0.76907
Precision 0.78167
Recall 0.76737
F1 Score 0.77446


In [83]:
#Coefficients
name,coef = coefficientsLr(trainedAverageLrL1[0])
df = pd.DataFrame(data={'variable': ['Intercept'] + name, 'coefficient': coef})
df = df.sort_values(by='coefficient')
print(df.to_markdown())

|    | variable                     |   coefficient |
|---:|:-----------------------------|--------------:|
| 22 | redDragons                   |   -0.137392   |
| 26 | redTotalDamageToChampions    |   -0.112962   |
| 27 | redAvgLevel                  |   -0.111554   |
| 28 | redTotalMinionsKilled        |   -0.1046     |
|  4 | blueDeaths                   |   -0.0620036  |
| 18 | redKills                     |   -0.0620036  |
| 21 | redEliteMonsters             |   -0.0590069  |
|  5 | blueAssists                  |   -0.0574082  |
| 29 | redTotalJungleMinionsKilled  |   -0.0379955  |
|  9 | blueTowersDestroyed          |   -0.0286652  |
| 16 | redWardsPlaced               |   -0.00201346 |
| 25 | redTotalGold                 |    0          |
| 23 | redHeraldsBarons             |    0          |
|  8 | blueHeraldsBarons            |    0          |
|  1 | blueWardsPlaced              |    0.00108526 |
| 17 | redWardsDestroyed            |    0.00319905 |
|  2 | blueWardsDestroyed   