In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score, roc_auc_score

In [None]:
df = pd.read_csv('../data/crawler/unified-events-statistics.csv')
def qtd_eventos(integer):
    return 45 - integer

qtd_eventos = list(map(qtd_eventos, df.isnull().sum(axis=1).tolist()))
df['qtd_eventos'] = qtd_eventos
df = df[df.qtd_eventos > 15]
df = df.replace(['BLUE: first_blood',
    'BLUE: dragon',
    'BLUE: herald',
    'BLUE: first_tower_top',
    'BLUE: first_tower_mid',
    'BLUE: first_tower_bot',
    'BLUE: second_tower_top',
    'BLUE: second_tower_mid',
    'BLUE: second_tower_bot',
    'BLUE: third_tower_top',
    'BLUE: third_tower_mid',
    'BLUE: third_tower_bot',
    'BLUE: inhibitor_top',
    'BLUE: inhibitor_mid',
    'BLUE: inhibitor_bot',
    'BLUE: baron',
    'BLUE: elder_dragon',
    'BLUE: nexus_tower',
    'BLUE: nexus',
    'RED: first_blood',
    'RED: dragon',
    'RED: herald',
    'RED: first_tower_top',
    'RED: first_tower_mid',
    'RED: first_tower_bot',
    'RED: second_tower_top',
    'RED: second_tower_mid',
    'RED: second_tower_bot',
    'RED: third_tower_top',
    'RED: third_tower_mid',
    'RED: third_tower_bot',
    'RED: inhibitor_top',
    'RED: inhibitor_mid',
    'RED: inhibitor_bot',
    'RED: baron',
    'RED: elder_dragon',
    'RED: nexus_tower',
    'RED: nexus'], range(38))
df

In [None]:
y = df['result'].copy()
X = df.drop(['golId','result','game','event11','event12','event13','event14','event15','event16','event17','event18','event19','event20','event21','event22','event23','event24','event25','event26','event27','event28','event29','event30','event31','event32','event33','event34','event35','event36','event37','event38','event39','event40','event41','event42','event43','event44','event45'],axis=1)
X

In [None]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=42, stratify=y)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test
X_train,X_test,y_train,y_test = preprocess_input(X,y)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

parameters = {
    "loss":["deviance","log_loss","exponential"],
    "learning_rate": [0.025,0.05,0.1],
    "min_samples_split": [1,3,8],
    "min_samples_leaf": [1,3,8],
    "max_depth":[1,3,5,8],
    "max_features":["log2","auto","sqrt"],
    "criterion": ["friedman_mse","squared_error", "mse"],
    "subsample":[0.1,0.5,1.0],
    "n_estimators":[10,100,200,300]
    }

gbc = GradientBoostingClassifier(random_state=42)

cv = GridSearchCV(gbc,parameters,cv=3, n_jobs=-1)
cv.fit(X_train,y_train)

In [None]:
def display(results):
    print(f'Best parameters are: {results.best_params_}')
    print("\n")
    mean_score = results.cv_results_['mean_test_score']
    std_score = results.cv_results_['std_test_score']
    params = results.cv_results_['params']
    for mean,std,params in zip(mean_score,std_score,params):
        print(f'{round(mean,3)} + or -{round(std,3)} for the {params}')

In [None]:
display(cv)

In [None]:
gbOpt= GradientBoostingClassifier(random_state=42,criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=3,max_features='log2',min_samples_leaf=1,min_samples_split=2, n_estimators=60,subsample=1.0)
gbOpt.fit(X_train, y_train)

scores_list = []
scores_list.append({
    'Balanced Accuracy': balanced_accuracy_score(y_test,gbOpt.predict(X_test)),
    'Precision':  precision_score(y_test,gbOpt.predict(X_test)),
    'Recall': recall_score(y_test,gbOpt.predict(X_test)),
    'F1-Score': f1_score(y_test,gbOpt.predict(X_test)),
    'ROC/AUC Score': roc_auc_score(y_test,gbOpt.predict(X_test))
})
scores = pd.DataFrame(scores_list)

In [None]:
scores