In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

In [2]:
df = pd.read_csv('../data/crawler/unified-events-statistics.csv')
def qtd_eventos(integer):
    return 45 - integer

qtd_eventos = list(map(qtd_eventos, df.isnull().sum(axis=1).tolist()))
df['qtd_eventos'] = qtd_eventos
df = df[df.qtd_eventos >= 25]
df = df.replace(['BLUE: first_blood',
    'BLUE: dragon',
    'BLUE: herald',
    'BLUE: first_tower_top',
    'BLUE: first_tower_mid',
    'BLUE: first_tower_bot',
    'BLUE: second_tower_top',
    'BLUE: second_tower_mid',
    'BLUE: second_tower_bot',
    'BLUE: third_tower_top',
    'BLUE: third_tower_mid',
    'BLUE: third_tower_bot',
    'BLUE: inhibitor_top',
    'BLUE: inhibitor_mid',
    'BLUE: inhibitor_bot',
    'BLUE: baron',
    'BLUE: elder_dragon',
    'BLUE: nexus_tower',
    'BLUE: nexus',
    'RED: first_blood',
    'RED: dragon',
    'RED: herald',
    'RED: first_tower_top',
    'RED: first_tower_mid',
    'RED: first_tower_bot',
    'RED: second_tower_top',
    'RED: second_tower_mid',
    'RED: second_tower_bot',
    'RED: third_tower_top',
    'RED: third_tower_mid',
    'RED: third_tower_bot',
    'RED: inhibitor_top',
    'RED: inhibitor_mid',
    'RED: inhibitor_bot',
    'RED: baron',
    'RED: elder_dragon',
    'RED: nexus_tower',
    'RED: nexus'], range(38))
df.head()

Unnamed: 0,golId,game,event1,event2,event3,event4,event5,event6,event7,event8,...,redMidWR,redMidKDA,redAdcGP,redAdcWR,redAdcKDA,redSupportGP,redSupportWR,redSupportKDA,result,qtd_eventos
0,36864,ESPORTSTMNT02_2556988,19,1,2,1,3,24,2,23,...,0.6,4.0,0,0.0,0.0,0,0.0,0.0,1,28
2,36866,ESPORTSTMNT02_2557390,19,2,20,24,2,4,20,3,...,1.0,20.0,0,0.0,0.0,5,0.6,3.2,0,31
3,36867,ESPORTSTMNT02_2557426,20,0,21,20,21,5,23,1,...,0.5,3.5,5,0.8,9.1,0,0.0,0.0,0,25
9,36894,ESPORTSTMNT02_2577994,0,20,2,1,5,3,6,20,...,0.0,0.0,1,1.0,12.0,0,0.0,0.0,1,30
11,36896,ESPORTSTMNT02_2576974,0,20,2,1,4,21,20,3,...,1.0,6.7,4,0.75,4.6,5,0.8,2.4,0,27


In [3]:
y = df['result'].copy()
X = df.drop(['golId','result','game','event26','event27','event28','event29','event30','event31','event32','event33','event34','event35','event36','event37','event38','event39','event40','event41','event42','event43','event44','event45', 'qtd_eventos'],axis=1)
X.head()

Unnamed: 0,event1,event2,event3,event4,event5,event6,event7,event8,event9,event10,...,redJungleKDA,redMidGP,redMidWR,redMidKDA,redAdcGP,redAdcWR,redAdcKDA,redSupportGP,redSupportWR,redSupportKDA
0,19,1,2,1,3,24,2,23,6,1,...,0.0,5,0.6,4.0,0,0.0,0.0,0,0.0,0.0
2,19,2,20,24,2,4,20,3,6,1,...,0.0,1,1.0,20.0,0,0.0,0.0,5,0.6,3.2
3,20,0,21,20,21,5,23,1,20,26,...,0.0,6,0.5,3.5,5,0.8,9.1,0,0.0,0.0
9,0,20,2,1,5,3,6,20,2,4,...,13.7,0,0.0,0.0,1,1.0,12.0,0,0.0,0.0
11,0,20,2,1,4,21,20,3,5,22,...,3.0,1,1.0,6.7,4,0.75,4.6,5,0.8,2.4


In [4]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

models = {
    'Logistic Regression': LogisticRegression(random_state=42,max_iter=10000),
    'Support Vector Machine (RBF Kernel)': SVC(random_state=42,C=1,gamma=0.01,kernel='sigmoid',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(random_state=42),
    'Adaboost': AdaBoostClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting Classifier': GradientBoostingClassifier(random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
}

for name, model in models.items():
  model.fit(X_train,y_train)
  print(name + ' trained')

Logistic Regression trained
Support Vector Machine (RBF Kernel) trained
Decission Tree trained
Adaboost trained
Random Forest trained
Gradient Boosting Classifier trained
KNN trained
Gaussian NB trained


In [6]:
scores_list = []

for name,model in models.items():    
    scores_list.append({
    'model': name,
    'amount_events': '25',
    'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
    'Precision':  precision_score(y_test,model.predict(X_test)),
    'Recall': recall_score(y_test,model.predict(X_test)),
    'F1-Score': f1_score(y_test,model.predict(X_test)),
    'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
    })
scores = pd.DataFrame(scores_list)

In [7]:
scores

Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
0,Logistic Regression,25,0.926213,0.896,0.957265,0.92562,0.968983
1,Support Vector Machine (RBF Kernel),25,0.926454,0.889764,0.965812,0.92623,0.973256
2,Decission Tree,25,0.892266,0.882353,0.897436,0.889831,0.892266
3,Adaboost,25,0.917184,0.907563,0.923077,0.915254,0.958644
4,Random Forest,25,0.917425,0.900826,0.931624,0.915966,0.976289
5,Gradient Boosting Classifier,25,0.904846,0.891667,0.91453,0.902954,0.972084
6,KNN,25,0.921457,0.908333,0.931624,0.919831,0.942549
7,Gaussian NB,25,0.925248,0.923077,0.923077,0.923077,0.969327


In [8]:
header = ['model', 'amount_events', 'auc']
scores.to_csv('../data/models-analysis/models.csv', columns = header, mode='a',index=False,header=False)

In [9]:
import pickle

with open("../models/model-25.pkl", "wb") as f:
    pickle.dump(models['Support Vector Machine (RBF Kernel)'], f)