In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score

In [2]:
df = pd.read_csv('data/secondary-prediction/secondary_prediction_players_statistics.csv')
df

Unnamed: 0,game,flagSide,topGP,topWR,topKDA,jungleGP,jungleWR,jungleKDA,midGP,midWR,...,kills,deaths,firstBlood,firstTower,firstHerald,dragons,barons,inhibitors,towers,heralds
0,ESPORTSTMNT01_2690210,1,4,0.25,2.2,2,0.00,1.2,3,0.33,...,9,19,1.0,1.0,1.0,1.0,0.0,0.0,3.0,2.0
1,ESPORTSTMNT01_2690219,1,3,0.67,4.0,0,0.00,0.0,12,0.42,...,3,16,0.0,0.0,1.0,1.0,0.0,0.0,3.0,1.0
2,ESPORTSTMNT01_2690219,0,6,0.50,5.1,4,0.50,4.7,0,0.00,...,16,3,1.0,1.0,0.0,4.0,2.0,2.0,11.0,1.0
3,ESPORTSTMNT01_2690210,1,4,0.25,2.2,2,0.00,1.2,3,0.33,...,9,19,1.0,1.0,1.0,1.0,0.0,0.0,3.0,2.0
4,ESPORTSTMNT01_2690210,0,4,0.00,1.5,3,0.33,1.1,2,0.50,...,19,9,0.0,0.0,0.0,3.0,0.0,1.0,6.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,ESPORTSTMNT04_2130223,0,1,0.00,0.0,1,0.00,1.0,1,0.00,...,24,6,1.0,1.0,1.0,5.0,2.0,2.0,10.0,2.0
947,ESPORTSTMNT01_2697165,1,0,0.00,0.0,0,0.00,0.0,10,0.40,...,10,21,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
948,ESPORTSTMNT01_2697165,0,0,0.00,0.0,5,0.80,5.7,25,0.72,...,21,10,0.0,1.0,0.0,4.0,1.0,1.0,8.0,1.0
949,ESPORTSTMNT01_2697166,1,0,0.00,0.0,0,0.00,0.0,2,0.00,...,26,15,1.0,1.0,0.0,2.0,2.0,2.0,9.0,0.0


In [3]:
y = df['firstTower'].copy()
X = df.drop(['game', 'firstBlood', 'kills', 'deaths', 'firstTower', 'firstHerald', 'dragons', 'barons', 'inhibitors', 'towers', 'heralds'], axis=1)
X

Unnamed: 0,flagSide,topGP,topWR,topKDA,jungleGP,jungleWR,jungleKDA,midGP,midWR,midKDA,carryGP,carryWR,carryKDA,suppGP,suppWR,suppKDA
0,1,4,0.25,2.2,2,0.00,1.2,3,0.33,4.1,0,0.00,0.0,0,0.00,0.0
1,1,3,0.67,4.0,0,0.00,0.0,12,0.42,4.2,1,0.00,4.0,1,1.00,9.0
2,0,6,0.50,5.1,4,0.50,4.7,0,0.00,0.0,1,0.00,1.3,11,0.36,2.4
3,1,4,0.25,2.2,2,0.00,1.2,3,0.33,4.1,0,0.00,0.0,0,0.00,0.0
4,0,4,0.00,1.5,3,0.33,1.1,2,0.50,4.2,4,0.50,3.0,0,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,0,1,0.00,0.0,1,0.00,1.0,1,0.00,1.5,2,0.00,1.4,0,0.00,0.0
947,1,0,0.00,0.0,0,0.00,0.0,10,0.40,2.3,1,0.00,3.8,9,0.44,2.0
948,0,0,0.00,0.0,5,0.80,5.7,25,0.72,10.2,6,0.83,8.9,34,0.62,4.1
949,1,0,0.00,0.0,0,0.00,0.0,2,0.00,2.8,0,0.00,0.0,0,0.00,0.0


In [4]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    scaler = StandardScaler()   
    print(X_train) 
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

     flagSide  topGP  topWR  topKDA  jungleGP  jungleWR  jungleKDA  midGP  \
812         1      1   1.00     4.0         2      0.50        1.3      7   
1           1      3   0.67     4.0         0      0.00        0.0     12   
934         1     36   0.69     3.9         3      0.33        5.3     17   
151         1      1   1.00     3.0         0      0.00        0.0      0   
271         1      1   1.00     5.0        27      0.70        4.3      0   
..        ...    ...    ...     ...       ...       ...        ...    ...   
535         0      6   0.50     2.3         2      0.50        5.0      3   
359         1     17   0.59     3.1        10      0.40        3.6      5   
49          0      1   1.00     5.3         2      1.00        8.2      6   
571         1      1   1.00     2.7         7      0.43        2.3      0   
48          0     11   0.64     3.2         0      0.00        0.0      4   

     midWR  midKDA  carryGP  carryWR  carryKDA  suppGP  suppWR  suppKDA  
8

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Support Vector Machine (Linear Kernel)': LinearSVC(),
    'Support Vector Machine (RBF Kernel)': SVC(),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier()
}

GBCmodel = GradientBoostingClassifier()
GBCmodel.fit(X_train,y_train)


for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained
Support Vector Machine (Linear Kernel) trained
Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained




Random Forest trained
Gradient Boosting Classifier trained


In [6]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'Model': name,
    'Accuracy': accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test)),
    'Recall': recall_score(y_test,model.predict(X_test)),
    'F1-Score': f1_score(y_test,model.predict(X_test))
    })
scores = pd.DataFrame(scores_list)

In [7]:
scores

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.513089,0.519231,0.556701,0.537313
1,Support Vector Machine (Linear Kernel),0.507853,0.514563,0.546392,0.53
2,Support Vector Machine (RBF Kernel),0.518325,0.52381,0.56701,0.544554
3,Decission Tree,0.52356,0.53,0.546392,0.538071
4,Adaboost,0.513089,0.520408,0.525773,0.523077
5,Random Forest,0.52356,0.53125,0.525773,0.528497
6,Gradient Boosting Classifier,0.513089,0.521739,0.494845,0.507937
