In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import seaborn as sns
import statsmodels.api as sm

In [2]:
# Read data
df = pd.read_csv("./All Data.csv")
df.head()

Unnamed: 0,GP,W,L,AGT,K,D,KD,CKPM,GPR,GSPD,...,WPM,CWPM,WCPM,win_rate,Team,division,EGR,MLR,year,msi_placed
0,13,7,6,42.7,239,235,1.02,0.85,0.0,0.0,...,0.0,0.0,0.0,53.85,7th heaven,LJL,0.0,0.0,2015,0
1,14,6,8,35.8,214,269,0.8,0.96,-0.89,-0.062,...,2.21,0.32,0.69,42.86,ATLAS eSports Team,TCL,0.0,0.0,2015,0
2,6,1,5,39.3,80,121,0.66,0.85,0.0,0.0,...,0.0,0.0,0.0,16.67,Bencheados,LLA,0.0,0.0,2015,0
3,14,12,2,33.9,291,175,1.66,0.98,1.47,0.153,...,2.37,0.34,0.82,85.71,Beşiktaş Esports,TCL,0.0,0.0,2015,1
4,5,0,5,27.6,27,110,0.25,0.99,-4.84,-0.326,...,2.82,0.27,0.6,0.0,Beşiktaş Esports,MSI,0.0,0.0,2015,-1


In [3]:
# Separate playoffs games and MSI games
playMask = df['msi_placed'] >= 0
msiMask = df['msi_placed'] == -1
playoffs = df[playMask]
msi = df[msiMask]
playoffs

Unnamed: 0,GP,W,L,AGT,K,D,KD,CKPM,GPR,GSPD,...,WPM,CWPM,WCPM,win_rate,Team,division,EGR,MLR,year,msi_placed
0,13,7,6,42.7,239,235,1.02,0.85,0.00,0.000,...,0.00,0.00,0.00,53.85,7th heaven,LJL,0.0,0.0,2015,0
1,14,6,8,35.8,214,269,0.80,0.96,-0.89,-0.062,...,2.21,0.32,0.69,42.86,ATLAS eSports Team,TCL,0.0,0.0,2015,0
2,6,1,5,39.3,80,121,0.66,0.85,0.00,0.000,...,0.00,0.00,0.00,16.67,Bencheados,LLA,0.0,0.0,2015,0
3,14,12,2,33.9,291,175,1.66,0.98,1.47,0.153,...,2.37,0.34,0.82,85.71,Beşiktaş Esports,TCL,0.0,0.0,2015,1
5,14,4,10,36.8,214,263,0.81,0.92,-1.12,-0.092,...,2.47,0.31,0.75,28.57,Big Plays Incorporated,TCL,0.0,0.0,2015,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,8,6,2,34.1,143,80,1.79,0.82,0.84,0.075,...,2.80,0.96,1.30,75.00,Unicorns of Love.CIS,LCL,45.7,29.3,2021,1
393,17,10,7,31.7,207,182,1.14,0.72,0.54,0.040,...,3.20,1.40,1.47,58.82,V3 Esports,LJL,50.9,8.0,2021,0
394,8,4,4,30.7,97,84,1.15,0.74,-0.01,0.024,...,3.25,1.37,1.41,50.00,Vorax Liberty,CBLOL,48.0,2.0,2021,0
396,14,9,5,31.7,198,191,1.04,0.88,0.57,0.025,...,3.68,1.49,1.46,64.29,paiN Gaming,CBLOL,63.1,1.2,2021,1


In [4]:
# Remove Team, Division, Kill, Death, KD
remove = ['Team', 'division', 'K', 'D', 'KD', 'W', 'L', 'win_rate']
columns = playoffs.get(remove)
data = playoffs.drop(remove, axis = 1)
data.columns

Index(['GP', 'AGT', 'CKPM', 'GPR', 'GSPD', 'GD15', 'FB%', 'FT%', 'F3T%',
       'HLD%', 'FD%', 'DRG%', 'ELD%', 'FBN%', 'BN%', 'LNE%', 'JNG%', 'WPM',
       'CWPM', 'WCPM', 'EGR', 'MLR', 'year', 'msi_placed'],
      dtype='object')

In [5]:
# SKLearn Logistic Regression and statsmodels process derived from https://towardsdatascience.com/building-a-logistic-regression-in-python-step-by-step-becd4d56c9c8

# Get features and remove target
features = list(data.columns)
features.remove('msi_placed') 

# Separating out the features
X = pd.DataFrame(data.loc[:, features])
# Separating out the target
y = pd.DataFrame(data.loc[:,['msi_placed']])

# Split data into train and test
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=40)

In [6]:
# Instantiate the model
logMod = LogisticRegression(max_iter = 10000)

# Fit the model
logMod.fit(X_train, y_train.to_numpy().ravel())

# Get prediction
yPred = logMod.predict(X_test)

In [7]:
cnf_matrix = metrics.confusion_matrix(y_test, yPred)
cnf_matrix

array([[79,  3],
       [ 7,  1]], dtype=int64)

In [8]:
print("Accuracy:",metrics.accuracy_score(y_test, yPred))
print("Precision:",metrics.precision_score(y_test, yPred))
print("Recall:",metrics.recall_score(y_test, yPred))

Accuracy: 0.8888888888888888
Precision: 0.25
Recall: 0.125


In [9]:
rfe = RFE(logMod)
rfe = rfe.fit(X, y.values.ravel())

print(rfe.support_)
print(rfe.ranking_)

[False False False  True  True False  True  True False  True False  True
  True  True  True  True False False False  True False False False]
[ 9  5  2  1  1 13  1  1  7  1  3  1  1  1  1  1  4 11  6  1 10  8 12]


In [10]:
X = X.loc[:, rfe.support_]
logit_model=sm.Logit(y,X)
result=logit_model.fit(method = 'bfgs')
print(result.summary2())

         Current function value: 0.299490
         Iterations: 35
         Function evaluations: 36
         Gradient evaluations: 36
                         Results: Logit
Model:              Logit            Pseudo R-squared: 0.143     
Dependent Variable: msi_placed       AIC:              237.0335  
Date:               2022-05-01 13:55 BIC:              279.7500  
No. Observations:   359              Log-Likelihood:   -107.52   
Df Model:           10               LL-Null:          -125.46   
Df Residuals:       348              LLR p-value:      8.7930e-05
Converged:          0.0000           Scale:            1.0000    
-------------------------------------------------------------------
          Coef.    Std.Err.      z      P>|z|     [0.025     0.975]
-------------------------------------------------------------------
GPR       1.0845     0.5386    2.0134   0.0441     0.0288    2.1402
GSPD      1.9939     7.7662    0.2567   0.7974   -13.2275   17.2154
FB%      -2.5871     1.0

