In [314]:
import pandas as pd
import numpy as np
from datetime import datetime

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score
from sklearn.utils import shuffle

In [315]:
df = pd.read_csv('data/secondary-prediction/secondary_prediction_players_statistics.csv')
df.groupby(['game']).size().sort_values(ascending=True)

game
ESPORTSTMNT01_2690210    2
ESPORTSTMNT03_2543175    2
ESPORTSTMNT03_2543173    2
ESPORTSTMNT03_2543155    2
ESPORTSTMNT03_2543154    2
                        ..
ESPORTSTMNT02_2551249    2
ESPORTSTMNT02_2551235    2
ESPORTSTMNT02_2551187    2
ESPORTSTMNT01_2707931    2
NA1_4217590112           2
Length: 1418, dtype: int64

In [316]:
y = df['firstTower'].copy()
X = df.drop(['game', 'firstBlood', 'kills', 'deaths', 'firstTower', 'firstHerald', 'dragons', 'barons', 'inhibitors', 'towers', 'heralds'], axis=1)
X

Unnamed: 0,flagSide,topGP,topWR,topKDA,jungleGP,jungleWR,jungleKDA,midGP,midWR,midKDA,carryGP,carryWR,carryKDA,suppGP,suppWR,suppKDA
0,1,3,0.67,4.0,0,0.00,0.0,12,0.42,4.2,1,0.00,4.0,1,1.00,9.0
1,0,6,0.50,5.1,4,0.50,4.7,0,0.00,0.0,1,0.00,1.3,11,0.36,2.4
2,1,4,0.25,2.2,2,0.00,1.2,3,0.33,4.1,0,0.00,0.0,0,0.00,0.0
3,0,4,0.00,1.5,3,0.33,1.1,2,0.50,4.2,4,0.50,3.0,0,0.00,0.0
4,1,3,0.00,1.6,0,0.00,0.0,1,0.00,0.7,5,0.40,3.1,0,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2831,0,1,1.00,13.0,1,0.00,0.8,13,0.77,8.3,3,1.00,28.0,1,1.00,29.0
2832,1,11,0.45,2.8,1,1.00,8.0,14,0.64,4.8,0,0.00,0.0,21,0.76,4.6
2833,0,3,1.00,9.8,0,0.00,0.0,0,0.00,0.0,0,0.00,0.0,0,0.00,0.0
2834,1,3,0.33,1.2,2,1.00,5.5,8,0.38,2.9,25,0.64,4.6,6,0.67,6.2


In [317]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    scaler = StandardScaler()   
    print(X_train) 
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

      flagSide  topGP  topWR  topKDA  jungleGP  jungleWR  jungleKDA  midGP  \
663          0      6   0.33     3.9         5      0.40        2.8      4   
391          0     29   0.69     3.7         0      0.00        0.0      9   
781          0      4   0.50     4.2         8      0.50        2.5      1   
2117         0      4   0.75     5.0         9      0.33        2.3      0   
1956         1      2   0.00     0.4         5      0.40        2.1     11   
...        ...    ...    ...     ...       ...       ...        ...    ...   
114          1     26   0.69     4.1         0      0.00        0.0      0   
1959         0      2   0.50     3.3        10      0.50        3.6     10   
2083         0     10   0.50     3.3        15      0.67        3.3     11   
1137         0      5   0.20     1.0         0      0.00        0.0      1   
608          1      6   1.00     5.2        25      0.60        4.6     10   

      midWR  midKDA  carryGP  carryWR  carryKDA  suppGP  suppWR

In [318]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Support Vector Machine (Linear Kernel)': LinearSVC(),
    'Support Vector Machine (RBF Kernel)': SVC(),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier()
}

GBCmodel = GradientBoostingClassifier()
GBCmodel.fit(X_train,y_train)


for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained
Support Vector Machine (Linear Kernel) trained




Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained
Random Forest trained
Gradient Boosting Classifier trained


In [319]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'Model': name,
    'Accuracy': accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test)),
    'Recall': recall_score(y_test,model.predict(X_test)),
    'F1-Score': f1_score(y_test,model.predict(X_test))
    })
scores = pd.DataFrame(scores_list)

In [320]:
scores

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.540493,0.540351,0.542254,0.541301
1,Support Vector Machine (Linear Kernel),0.542254,0.541958,0.545775,0.54386
2,Support Vector Machine (RBF Kernel),0.517606,0.517606,0.517606,0.517606
3,Decission Tree,0.480634,0.480969,0.489437,0.485166
4,Adaboost,0.510563,0.510714,0.503521,0.507092
5,Random Forest,0.489437,0.489583,0.496479,0.493007
6,Gradient Boosting Classifier,0.521127,0.520408,0.538732,0.529412
