In [1]:
from sklearn.preprocessing import OneHotEncoder

import pandas as pd
import numpy as np
import seaborn as sns # data visualization library  
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score, roc_auc_score

In [2]:
df = pd.read_csv('../data/crawler/unified-events-statistics.csv')
df = df.drop(['golId','game','event21','event22','event23','event24','event25','event26','event27','event28','event29','event30','event31','event32','event33','event34','event35','event36','event37','event38','event39','event40','event41','event42','event43','event44','event45'],axis=1)
y = df.result
x = df.drop(['result'],axis=1)
df.columns

Index(['event1', 'event2', 'event3', 'event4', 'event5', 'event6', 'event7',
       'event8', 'event9', 'event10', 'event11', 'event12', 'event13',
       'event14', 'event15', 'event16', 'event17', 'event18', 'event19',
       'event20', 'blueTopGP', 'blueTopWR', 'blueTopKDA', 'blueJungleGP',
       'blueJungleWR', 'blueJungleKDA', 'blueMidGP', 'blueMidWR', 'blueMidKDA',
       'blueADCGP', 'blueADCWR', 'blueADCKDA', 'blueSupportGP',
       'blueSupportWR', 'blueSupportKDA', 'redTopGP', 'redTopWR', 'redTopKDA',
       'redJungleGP', 'redJungleWR', 'redJungleKDA', 'redMidGP', 'redMidWR',
       'redMidKDA', 'redAdcGP', 'redAdcWR', 'redAdcKDA', 'redSupportGP',
       'redSupportWR', 'redSupportKDA', 'result'],
      dtype='object')

In [3]:
#creating instance of one-hot-encoder
encoder = OneHotEncoder(handle_unknown='ignore')

#perform one-hot encoding on 'team' column 
encoder_df = pd.DataFrame(encoder.fit_transform(df[['event1', 'event2', 'event3','event4','event5','event6','event7','event8','event9','event10','event11','event12','event13','event14','event15','event16','event17','event18','event19','event20']]).toarray())
encoder_df.columns = encoder.get_feature_names()
#merge one-hot encoded columns back with original DataFrame
transformed_df = df.join(encoder_df)
transformed_df = transformed_df.drop(['event1', 'event2', 'event3','event4','event5','event6','event7','event8','event9','event10','event11','event12','event13','event14','event15','event16','event17','event18','event19','event20'],axis=1)
transformed_df.head()

Unnamed: 0,blueTopGP,blueTopWR,blueTopKDA,blueJungleGP,blueJungleWR,blueJungleKDA,blueMidGP,blueMidWR,blueMidKDA,blueADCGP,...,x19_RED: inhibitor_top,x19_RED: nexus,x19_RED: nexus_tower,x19_RED: second_tower_bot,x19_RED: second_tower_mid,x19_RED: second_tower_top,x19_RED: third_tower_bot,x19_RED: third_tower_mid,x19_RED: third_tower_top,x19_nan
0,1,1.0,2.0,1,1.0,2.3,1,1.0,2.7,6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,0.6,5.6,2,0.0,1.1,9,0.33,1.7,4,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,0.0,0.0,1,1.0,3.3,0,0.0,0.0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,23,0.61,3.6,5,0.2,1.6,8,0.63,4.2,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3,1.0,9.8,2,1.0,7.0,1,0.0,2.0,7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
corr = transformed_df.corr()

transformed_df[transformed_df.columns[1:]].corr()['result'][:-1].sort_values(ascending=False)

In [None]:
y = transformed_df['result'].copy()
X = transformed_df.drop(['result'],axis=1)

In [None]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

In [None]:
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

models = {
    'Logistic Regression': LogisticRegression(max_iter=10000),
    'Support Vector Machine (Linear Kernel)': LinearSVC(max_iter=100000),
    'Support Vector Machine (RBF Kernel)': SVC(max_iter=100000),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting Classifier': GradientBoostingClassifier()
}


for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

In [None]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'Model': name,
    'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test)),
    'Recall': recall_score(y_test,model.predict(X_test)),
    'F1-Score': f1_score(y_test,model.predict(X_test)),
    'ROC/AUC Score': roc_auc_score(y_test,model.predict(X_test))
    })
scores = pd.DataFrame(scores_list)

In [None]:
scores