In [12]:
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def qtd_eventos(integer):
    return int((400 - integer) / 2)

def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,stratify=y)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test,scaler

In [13]:
df = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills-as-columns-early.csv')

y = df['result'].copy()
X = df.drop(['golId','result'],axis=1)
X.head()

Unnamed: 0,blueTopGP,blueTopWR,blueTopKDA,blueJungleGP,blueJungleWR,blueJungleKDA,blueMidGP,blueMidWR,blueMidKDA,blueADCGP,...,RED:third_tower_top,RED:third_tower_mid,RED:third_tower_bot,RED:inhibitor_top,RED:inhibitor_mid,RED:inhibitor_bot,RED:baron,RED:elder_dragon,RED:nexus_tower,RED:nexus
0,0,0.0,0.0,13,0.54,2.44,3,0.33,3.0,0,...,0,0,0,0,0,0,0,0,0,0
1,4,0.75,6.2,0,0.0,0.0,1,1.0,9.0,2,...,0,0,0,0,0,0,0,0,0,0
2,3,0.33,13.5,2,0.5,6.67,2,0.5,23.0,2,...,0,0,0,0,0,0,0,0,0,0
3,4,0.5,5.17,6,0.83,7.0,1,1.0,9.0,8,...,0,0,0,0,0,0,0,0,0,0
4,11,0.55,5.12,16,0.44,5.11,3,1.0,9.33,4,...,0,0,0,0,0,0,0,0,0,0


In [14]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 6.799415063858032 (+/- 0.5843438218984731)
score_time: 0.5583905220031739 (+/- 0.1445723685504604)
test_Balanced Accuracy: 0.677782402791467 (+/- 0.007743781117574003)
test_Precision: 0.6797319651755218 (+/- 0.008429901715484337)
test_Recall: 0.6795783696609834 (+/- 0.008125863173276036)
test_F1: 0.6787780019850088 (+/- 0.007785715413578749)
test_AUC: 0.7421438196930833 (+/- 0.006613463982077911)


In [15]:
executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model_obj']
ordered_scores

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
0,Logistic Regression,LogisticRegression(max_iter=50000),3,0.689335,0.69767,0.717384,0.707316,0.761536
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",3,0.682523,0.692642,0.706539,0.699467,0.752909
1,Support Vector Machine (RBF Kernel),"SVC(C=100, gamma=0.001, max_iter=50000, probab...",3,0.681504,0.689178,0.713876,0.701171,0.75037
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,3,0.675306,0.673782,0.742105,0.70621,0.749736
4,Random Forest,"(DecisionTreeClassifier(max_depth=25, max_feat...",3,0.674782,0.681299,0.712759,0.696576,0.741452
7,Gaussian NB,GaussianNB(),3,0.580918,0.70776,0.374163,0.382069,0.715343
6,KNN,KNeighborsClassifier(n_neighbors=3),3,0.589409,0.604896,0.619617,0.612138,0.613759
2,Decission Tree,DecisionTreeClassifier(),3,0.585756,0.605084,0.592344,0.598519,0.585756


In [18]:
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[1]['model_obj']

# Fazer previsões no conjunto de teste
y_pred = best_model.predict(X_test)
y_test = y_test.reset_index(drop=True)

In [19]:
# Obtendo a importância das características
feature_importances = best_model.feature_importances_

# Criando um DataFrame para visualizar as importâncias
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})

# Ordenando as características pela importância
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Visualizando as 10 características mais importantes
importance_df.head(20)

Unnamed: 0,Feature,Importance
50,RED:kill,0.12
11,blueADCKDA,0.1
26,redAdcKDA,0.08
30,BLUE:kill,0.06
20,redJungleKDA,0.06
5,blueJungleKDA,0.06
24,redAdcGP,0.04
14,blueSupportKDA,0.04
53,RED:dragon,0.04
9,blueADCGP,0.04


In [20]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
y_prob = best_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_prob)
partidas_erradas = y_test[y_test != y_pred].index.tolist()

print('Testes realizados com ' + str(len(y_test)) + ' partidas.')
print('Acurácia: ' + str(accuracy))
print('AUC: ' + str(auc))
for index in partidas_erradas:
    print('O resultado era ' + str(y_test[index]) + ' e o previsto foi ' + str(y_pred[index]))

Testes realizados com 1203 partidas.
Acurácia: 0.6774729842061513
AUC: 0.7419493731171363
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 1 e o 