In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score

In [2]:
df = pd.read_csv('data/secondary-prediction/secondary_prediction_full.csv')
df

Unnamed: 0,game,side,topPlayer,top,junglePlayer,jungle,midPlayer,mid,carryPlayer,carry,...,kills,deaths,firstBlood,firstTower,firstHerald,dragons,barons,inhibitors,towers,heralds
0,ESPORTSTMNT01_2690210,1,Soboro,58,Raptor,5,Feisty,7,Gamin,135,...,9,19,1.0,1.0,1.0,1.0,0.0,0.0,3.0,2.0
1,ESPORTSTMNT01_2690210,0,DnDn,79,Sylvie,108,FIESTA,112,vital,222,...,19,9,0.0,0.0,0.0,3.0,0.0,1.0,6.0,0.0
2,ESPORTSTMNT01_2690219,1,Photon,79,Forest,64,TolanD,61,Trigger,202,...,3,16,0.0,0.0,1.0,1.0,0.0,0.0,3.0,1.0
3,ESPORTSTMNT01_2690219,0,Meaning,41,HamBak,76,Ten10,58,KingKong,134,...,16,3,1.0,1.0,0.0,4.0,2.0,2.0,11.0,1.0
4,ESPORTSTMNT01_2690227,1,Castle,58,PangSu,91,VicLa,142,Noah,523,...,14,5,0.0,1.0,0.0,4.0,1.0,2.0,11.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7494,ESPORTSTMNT03_2656208,0,Bradley,114,Armao,56,Haeri,103,Yeon,124,...,11,9,0.0,0.0,1.0,2.0,0.0,1.0,7.0,2.0
7495,ESPORTSTMNT03_2656209,1,Darshan,150,Malice,28,Copy,104,k1ng,222,...,16,3,1.0,0.0,0.0,1.0,1.0,1.0,8.0,1.0
7496,ESPORTSTMNT03_2656209,0,Bradley,39,Armao,108,Haeri,103,Yeon,498,...,3,16,0.0,1.0,1.0,2.0,0.0,0.0,1.0,1.0
7497,ESPORTSTMNT03_2656210,1,Bradley,41,Armao,120,Haeri,70,Yeon,236,...,12,7,1.0,1.0,1.0,1.0,1.0,1.0,8.0,2.0


In [3]:
y = df['firstTower'].copy()
X = df.drop(['game', 'firstBlood', 'kills', 'deaths', 'firstTower', 'firstHerald', 'dragons', 'barons', 'inhibitors', 'towers', 'heralds', 'topPlayer', 'junglePlayer', 'midPlayer', 'carryPlayer', 'suppPlayer'], axis=1)
X

Unnamed: 0,side,top,jungle,mid,carry,supp
0,1,58,5,7,135,89
1,0,79,108,112,222,12
2,1,79,64,61,202,497
3,0,41,76,58,134,89
4,1,58,91,142,523,350
...,...,...,...,...,...,...
7494,0,114,56,103,124,497
7495,1,150,28,104,222,223
7496,0,39,108,103,498,201
7497,1,41,120,70,236,267


In [4]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    scaler = StandardScaler()   
    print(X_train) 
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

      side  top  jungle  mid  carry  supp
5585     1   23      77   61    222   223
4379     0  114       5  268    523   223
4935     1  104      48  123    222    89
4555     1   78     120   45     67   111
4862     1  164     106   26    124   201
...    ...  ...     ...  ...    ...   ...
4168     0  150       5   13    523    53
4055     1  875      59  517    222   111
3002     1  104     876   13    124   111
5155     0  126      48  103    498   223
2342     1   79       5  268    124   350

[5999 rows x 6 columns]


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Support Vector Machine (Linear Kernel)': LinearSVC(),
    'Support Vector Machine (RBF Kernel)': SVC(),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier()
}

GBCmodel = GradientBoostingClassifier()
GBCmodel.fit(X_train,y_train)


for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained




Support Vector Machine (Linear Kernel) trained
Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained
Random Forest trained
Gradient Boosting Classifier trained


In [6]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'Model': name,
    'Accuracy': accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test)),
    'Recall': recall_score(y_test,model.predict(X_test)),
    'F1-Score': f1_score(y_test,model.predict(X_test))
    })
scores = pd.DataFrame(scores_list)

In [7]:
scores

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.519333,0.522698,0.533069,0.527832
1,Support Vector Machine (Linear Kernel),0.518667,0.522078,0.531746,0.526868
2,Support Vector Machine (RBF Kernel),0.522667,0.524213,0.572751,0.547408
3,Decission Tree,0.509333,0.513021,0.521164,0.51706
4,Adaboost,0.546,0.548263,0.563492,0.555773
5,Random Forest,0.527333,0.530719,0.537037,0.533859
6,Gradient Boosting Classifier,0.548,0.552,0.547619,0.549801
