In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

In [2]:
df = pd.read_csv('../data/crawler/unified-events-statistics.csv')
def qtd_eventos(integer):
    return 45 - integer

qtd_eventos = list(map(qtd_eventos, df.isnull().sum(axis=1).tolist()))
df['qtd_eventos'] = qtd_eventos
df = df[df.qtd_eventos >= 30]
df = df.replace(['BLUE: first_blood',
    'BLUE: dragon',
    'BLUE: herald',
    'BLUE: first_tower_top',
    'BLUE: first_tower_mid',
    'BLUE: first_tower_bot',
    'BLUE: second_tower_top',
    'BLUE: second_tower_mid',
    'BLUE: second_tower_bot',
    'BLUE: third_tower_top',
    'BLUE: third_tower_mid',
    'BLUE: third_tower_bot',
    'BLUE: inhibitor_top',
    'BLUE: inhibitor_mid',
    'BLUE: inhibitor_bot',
    'BLUE: baron',
    'BLUE: elder_dragon',
    'BLUE: nexus_tower',
    'BLUE: nexus',
    'RED: first_blood',
    'RED: dragon',
    'RED: herald',
    'RED: first_tower_top',
    'RED: first_tower_mid',
    'RED: first_tower_bot',
    'RED: second_tower_top',
    'RED: second_tower_mid',
    'RED: second_tower_bot',
    'RED: third_tower_top',
    'RED: third_tower_mid',
    'RED: third_tower_bot',
    'RED: inhibitor_top',
    'RED: inhibitor_mid',
    'RED: inhibitor_bot',
    'RED: baron',
    'RED: elder_dragon',
    'RED: nexus_tower',
    'RED: nexus'], range(38))
df.head()

Unnamed: 0,golId,game,event1,event2,event3,event4,event5,event6,event7,event8,...,redMidWR,redMidKDA,redAdcGP,redAdcWR,redAdcKDA,redSupportGP,redSupportWR,redSupportKDA,result,qtd_eventos
2,36866,ESPORTSTMNT02_2557390,19,2,20,24,2,4,20,3,...,1.0,20.0,0,0.0,0.0,5,0.6,3.2,0,31
9,36894,ESPORTSTMNT02_2577994,0,20,2,1,5,3,6,20,...,0.0,0.0,1,1.0,12.0,0,0.0,0.0,1,30
26,36926,ESPORTSTMNT05_2530903,19,21,20,22,23,20,21,25,...,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,34
27,36928,ESPORTSTMNT05_2530937,0,20,21,1,22,3,24,2,...,1.0,12.0,2,0.0,1.8,0,0.0,0.0,1,35
29,36930,ESPORTSTMNT05_2530947,20,2,0,20,2,1,4,3,...,0.0,3.0,2,1.0,3.0,0,0.0,0.0,0,32


In [3]:
y = df['result'].copy()
X = df.drop(['golId','result','game','event31','event32','event33','event34','event35','event36','event37','event38','event39','event40','event41','event42','event43','event44','event45', 'qtd_eventos'],axis=1)
X.head()

Unnamed: 0,event1,event2,event3,event4,event5,event6,event7,event8,event9,event10,...,redJungleKDA,redMidGP,redMidWR,redMidKDA,redAdcGP,redAdcWR,redAdcKDA,redSupportGP,redSupportWR,redSupportKDA
2,19,2,20,24,2,4,20,3,6,1,...,0.0,1,1.0,20.0,0,0.0,0.0,5,0.6,3.2
9,0,20,2,1,5,3,6,20,2,4,...,13.7,0,0.0,0.0,1,1.0,12.0,0,0.0,0.0
26,19,21,20,22,23,20,21,25,5,24,...,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0
27,0,20,21,1,22,3,24,2,4,7,...,0.0,1,1.0,12.0,2,0.0,1.8,0,0.0,0.0
29,20,2,0,20,2,1,4,3,22,5,...,11.0,1,0.0,3.0,2,1.0,3.0,0,0.0,0.0


In [4]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

models = {
    'Logistic Regression': LogisticRegression(random_state=42,max_iter=10000),
    'Support Vector Machine (RBF Kernel)': SVC(random_state=42,C=1,gamma=0.01,kernel='sigmoid',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(random_state=42),
    'Adaboost': AdaBoostClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting Classifier': GradientBoostingClassifier(random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
}

for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained
Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained
Random Forest trained
Gradient Boosting Classifier trained
KNN trained
Gaussian NB trained


In [6]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'model': name,
    'amount_events': '30',
    'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test)),
    'Recall': recall_score(y_test,model.predict(X_test)),
    'F1-Score': f1_score(y_test,model.predict(X_test)),
    'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
    })
scores = pd.DataFrame(scores_list)

In [7]:
scores

Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
0,Logistic Regression,30,0.840336,0.8,0.857143,0.827586,0.885504
1,Support Vector Machine (RBF Kernel),30,0.890756,0.83871,0.928571,0.881356,0.907563
2,Decission Tree,30,0.858193,0.806452,0.892857,0.847458,0.858193
3,Adaboost,30,0.872899,0.833333,0.892857,0.862069,0.92542
4,Random Forest,30,0.887605,0.862069,0.892857,0.877193,0.943803
5,Gradient Boosting Classifier,30,0.890756,0.83871,0.928571,0.881356,0.938025
6,KNN,30,0.837185,0.821429,0.821429,0.821429,0.871324
7,Gaussian NB,30,0.869748,0.857143,0.857143,0.857143,0.875525


In [8]:
header = ['model', 'amount_events', 'auc']
scores.to_csv('../data/models-analysis/models.csv', columns = header, mode='a',index=False,header=False)

In [9]:
import pickle

with open("../models/model-30.pkl", "wb") as f:
    pickle.dump(models['Random Forest'], f)