In [39]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

In [40]:
df = pd.read_csv('../../data/crawler/unified-events-statistics.csv')
def qtd_eventos(integer):
    return 45 - integer

qtd_eventos = list(map(qtd_eventos, df.isnull().sum(axis=1).tolist()))
df['qtd_eventos'] = qtd_eventos
df = df[df.qtd_eventos >= 15]
df = df.replace(['BLUE: first_blood',
    'BLUE: dragon',
    'BLUE: herald',
    'BLUE: first_tower_top',
    'BLUE: first_tower_mid',
    'BLUE: first_tower_bot',
    'BLUE: second_tower_top',
    'BLUE: second_tower_mid',
    'BLUE: second_tower_bot',
    'BLUE: third_tower_top',
    'BLUE: third_tower_mid',
    'BLUE: third_tower_bot',
    'BLUE: inhibitor_top',
    'BLUE: inhibitor_mid',
    'BLUE: inhibitor_bot',
    'BLUE: baron',
    'BLUE: elder_dragon',
    'BLUE: nexus_tower',
    'BLUE: nexus',
    'RED: first_blood',
    'RED: dragon',
    'RED: herald',
    'RED: first_tower_top',
    'RED: first_tower_mid',
    'RED: first_tower_bot',
    'RED: second_tower_top',
    'RED: second_tower_mid',
    'RED: second_tower_bot',
    'RED: third_tower_top',
    'RED: third_tower_mid',
    'RED: third_tower_bot',
    'RED: inhibitor_top',
    'RED: inhibitor_mid',
    'RED: inhibitor_bot',
    'RED: baron',
    'RED: elder_dragon',
    'RED: nexus_tower',
    'RED: nexus'], range(38))
df.head()

Unnamed: 0,golId,game,event1,event2,event3,event4,event5,event6,event7,event8,...,redMidWR,redMidKDA,redAdcGP,redAdcWR,redAdcKDA,redSupportGP,redSupportWR,redSupportKDA,result,qtd_eventos
0,36864,ESPORTSTMNT02_2556988,19,1,2,1,3,24,2,23,...,0.2,4.0,0,0.0,0.0,0,0.0,0.0,1,28
1,36865,ESPORTSTMNT05_2520933,19,21,1,22,25,24,20,21,...,0.25,3.55,10,0.4,4.07,2,0.5,5.4,0,20
2,36866,ESPORTSTMNT02_2557390,19,2,20,24,2,4,20,3,...,0.0,20.0,0,0.0,0.0,4,1.0,2.8,0,31
3,36867,ESPORTSTMNT02_2557426,20,0,21,20,21,5,23,1,...,0.2,4.0,5,0.6,9.12,0,0.0,0.0,0,25
4,36868,ESPORTSTMNT02_2557463,19,1,2,1,2,4,20,24,...,0.0,2.35,2,0.0,24.0,0,0.0,0.0,0,23


In [41]:
y = df['result'].copy()
X = df.drop(['qtd_eventos','golId','result','game','event16','event17','event18','event19','event20','event21','event22','event23','event24','event25','event26','event27','event28','event29','event30','event31','event32','event33','event34','event35','event36','event37','event38','event39','event40','event41','event42','event43','event44','event45'],axis=1)
X.head()

Unnamed: 0,event1,event2,event3,event4,event5,event6,event7,event8,event9,event10,...,redJungleKDA,redMidGP,redMidWR,redMidKDA,redAdcGP,redAdcWR,redAdcKDA,redSupportGP,redSupportWR,redSupportKDA
0,19,1,2,1,3,24,2,23,6,1,...,0.0,5,0.2,4.0,0,0.0,0.0,0,0.0,0.0
1,19,21,1,22,25,24,20,21,23,26,...,4.0,4,0.25,3.55,10,0.4,4.07,2,0.5,5.4
2,19,2,20,24,2,4,20,3,6,1,...,0.0,1,0.0,20.0,0,0.0,0.0,4,1.0,2.8
3,20,0,21,20,21,5,23,1,20,26,...,0.0,5,0.2,4.0,5,0.6,9.12,0,0.0,0.0
4,19,1,2,1,2,4,20,24,5,22,...,6.6,5,0.0,2.35,2,0.0,24.0,0,0.0,0.0


In [42]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,stratify=y,random_state=42)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test,scaler

In [43]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

executions = pd.DataFrame()

for i in range(50):
  print('execução número ' + str(i))
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X,y)

  models = {
      'Logistic Regression': LogisticRegression(max_iter=10000),
      'Support Vector Machine (RBF Kernel)': SVC(C=1,gamma=0.01,kernel='sigmoid',max_iter=50000,probability=True),
      'Decission Tree': DecisionTreeClassifier(),
      'Adaboost': AdaBoostClassifier(),
      'Random Forest': RandomForestClassifier(),
      'Gradient Boosting Classifier': GradientBoostingClassifier(),
      'KNN': KNeighborsClassifier(n_neighbors=3),
      'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '0',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

execução número 0


execução número 1
execução número 2
execução número 3
execução número 4
execução número 5
execução número 6
execução número 7
execução número 8
execução número 9
execução número 10
execução número 11
execução número 12
execução número 13
execução número 14
execução número 15
execução número 16
execução número 17
execução número 18
execução número 19
execução número 20
execução número 21
execução número 22
execução número 23
execução número 24
execução número 25
execução número 26
execução número 27
execução número 28
execução número 29
execução número 30
execução número 31
execução número 32
execução número 33
execução número 34
execução número 35
execução número 36
execução número 37
execução número 38
execução número 39
execução número 40
execução número 41
execução número 42
execução número 43
execução número 44
execução número 45
execução número 46
execução número 47
execução número 48
execução número 49


In [44]:
avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '0',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)

In [45]:
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)

In [46]:
ordered_scores

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",0,0.873792,0.880527,0.877029,0.878716,0.943895
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,0,0.873189,0.875798,0.882524,0.879091,0.943149
4,Random Forest,"(DecisionTreeClassifier(max_features='auto', r...",0,0.871395,0.874462,0.880256,0.877288,0.939918
0,Logistic Regression,LogisticRegression(max_iter=10000),0,0.868173,0.86394,0.888498,0.876004,0.936833
1,Support Vector Machine (RBF Kernel),"SVC(C=1, gamma=0.01, kernel='sigmoid', max_ite...",0,0.867162,0.863101,0.88738,0.875031,0.936574
7,Gaussian NB,GaussianNB(),0,0.855404,0.859364,0.864696,0.861972,0.929719
6,KNN,KNeighborsClassifier(n_neighbors=3),0,0.805606,0.788696,0.862428,0.82388,0.859717
2,Decission Tree,DecisionTreeClassifier(),0,0.794167,0.804804,0.799201,0.801904,0.794167


In [47]:
auc_modelo1 = ordered_scores.iloc[0]['auc']
auc_modelo2 = ordered_scores.iloc[1]['auc']
auc_modelo3 = ordered_scores.iloc[2]['auc']

# Calcule o somatório dos AUCs
soma_auc = auc_modelo1 + auc_modelo2 + auc_modelo3

# Calcule os pesos normalizados
peso_normalizado_modelo1 = auc_modelo1 / soma_auc
peso_normalizado_modelo2 = auc_modelo2 / soma_auc
peso_normalizado_modelo3 = auc_modelo3 / soma_auc

# Insira os pesos normalizados em uma lista
pesos_normalizados = [peso_normalizado_modelo1, peso_normalizado_modelo2, peso_normalizado_modelo3]

# Verifique se a soma dos pesos é 1 (para garantir a normalização)
soma_pesos = sum(pesos_normalizados)
soma_pesos

1.0

In [53]:
# Ensemble Voting Classifier
from sklearn.ensemble import VotingClassifier

# Criando o ensemble com os classificadores
ensemble_classifier = VotingClassifier(estimators=[
    (ordered_scores.iloc[0]['model'], ordered_scores.iloc[0]['model_obj']),
    (ordered_scores.iloc[1]['model'], ordered_scores.iloc[1]['model_obj']),
    (ordered_scores.iloc[2]['model'], ordered_scores.iloc[2]['model_obj'])
], voting='soft')

ensemble_classifier = ensemble_classifier.fit(X_train, y_train)

In [54]:
ensemble_scores_list = []
ensemble_scores_list.append({
    'model': 'Ensemble Classifier',
    'model_obj': ensemble_classifier,
    'amount_events': 0,
    'Balanced Accuracy': balanced_accuracy_score(y_test,ensemble_classifier.predict(X_test)),
    'Precision':  precision_score(y_test,ensemble_classifier.predict(X_test)),
    'Recall': recall_score(y_test,ensemble_classifier.predict(X_test)),
    'F1-Score': f1_score(y_test,ensemble_classifier.predict(X_test)),
    'auc': roc_auc_score(y_test,ensemble_classifier.predict_proba(X_test)[:,1])
})

In [55]:
ensemble_scores = pd.DataFrame(ensemble_scores_list)
ensemble_scores_list

[{'model': 'Ensemble Classifier',
  'model_obj': VotingClassifier(estimators=[('Adaboost', AdaBoostClassifier()),
                               ('Gradient Boosting Classifier',
                                GradientBoostingClassifier()),
                               ('Random Forest', RandomForestClassifier())],
                   voting='soft'),
  'amount_events': 0,
  'Balanced Accuracy': 0.8722488462903799,
  'Precision': 0.8736176935229067,
  'Recall': 0.8833865814696485,
  'F1-Score': 0.8784749801429707,
  'auc': 0.942761026801562}]

In [51]:
ensemble_scores

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
0,Ensemble Classifier,"VotingClassifier(estimators=[('Adaboost', AdaB...",0,0.869783,0.871835,0.880192,0.875994,0.943199


In [52]:
models = pd.concat([ensemble_scores,avg_scores])
ordered_models = models.sort_values(by='auc', ascending=False)
ordered_models

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",0,0.873792,0.880527,0.877029,0.878716,0.943895
0,Ensemble Classifier,"VotingClassifier(estimators=[('Adaboost', AdaB...",0,0.869783,0.871835,0.880192,0.875994,0.943199
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,0,0.873189,0.875798,0.882524,0.879091,0.943149
4,Random Forest,"(DecisionTreeClassifier(max_features='auto', r...",0,0.871395,0.874462,0.880256,0.877288,0.939918
0,Logistic Regression,LogisticRegression(max_iter=10000),0,0.868173,0.86394,0.888498,0.876004,0.936833
1,Support Vector Machine (RBF Kernel),"SVC(C=1, gamma=0.01, kernel='sigmoid', max_ite...",0,0.867162,0.863101,0.88738,0.875031,0.936574
7,Gaussian NB,GaussianNB(),0,0.855404,0.859364,0.864696,0.861972,0.929719
6,KNN,KNeighborsClassifier(n_neighbors=3),0,0.805606,0.788696,0.862428,0.82388,0.859717
2,Decission Tree,DecisionTreeClassifier(),0,0.794167,0.804804,0.799201,0.801904,0.794167
