In [110]:
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def qtd_eventos(integer):
    return int((400 - integer) / 2)

def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,stratify=y)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test,scaler

In [111]:
df = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills.csv')

qtd_eventos = list(map(qtd_eventos, df.isnull().sum(axis=1).tolist()))
df['qtd_eventos'] = qtd_eventos
df = df.replace([
    'BLUE: kill',
    'BLUE: plate',
    'BLUE: first_blood',
    'BLUE: dragon',
    'BLUE: herald',
    'BLUE: first_tower_top',
    'BLUE: first_tower_mid',
    'BLUE: first_tower_bot',
    'BLUE: second_tower_top',
    'BLUE: second_tower_mid',
    'BLUE: second_tower_bot',
    'BLUE: third_tower_top',
    'BLUE: third_tower_mid',
    'BLUE: third_tower_bot',
    'BLUE: inhibitor_top',
    'BLUE: inhibitor_mid',
    'BLUE: inhibitor_bot',
    'BLUE: baron',
    'BLUE: elder_dragon',
    'BLUE: nexus_tower',
    'BLUE: nexus',
    'RED: kill',
    'RED: plate',
    'RED: first_blood',
    'RED: dragon',
    'RED: herald',
    'RED: first_tower_top',
    'RED: first_tower_mid',
    'RED: first_tower_bot',
    'RED: second_tower_top',
    'RED: second_tower_mid',
    'RED: second_tower_bot',
    'RED: third_tower_top',
    'RED: third_tower_mid',
    'RED: third_tower_bot',
    'RED: inhibitor_top',
    'RED: inhibitor_mid',
    'RED: inhibitor_bot',
    'RED: baron',
    'RED: elder_dragon',
    'RED: nexus_tower',
    'RED: nexus'], range(1,43))

df = df.drop(['game'],axis=1)
df = df.fillna(0)
df = df.astype(int)
y = df['result'].copy()
X = df.drop(['golId','result','qtd_eventos'],axis=1)


  interactivity=interactivity, compiler=compiler, result=result)


In [112]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 6.9963537693023685 (+/- 0.3426352684189381)
score_time: 0.3157546043395996 (+/- 0.019628157492911753)
test_Balanced Accuracy: 0.9796460474916138 (+/- 0.004724696557132967)
test_Precision: 0.9795316248034542 (+/- 0.004739408920280752)
test_Recall: 0.979214332443987 (+/- 0.004851426607578243)
test_F1: 0.9792212143448286 (+/- 0.004848814780228129)
test_AUC: 0.9960561311696952 (+/- 0.0025369719494443147)


In [113]:
executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model']
ordered_scores

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,3,0.979415,0.990562,0.9689,0.979588,0.995298
5,Gradient Boosting Classifier,3,0.970956,0.983039,0.959968,0.971347,0.993459
1,Support Vector Machine (RBF Kernel),3,0.986674,0.988968,0.985327,0.987134,0.992215
6,KNN,3,0.972639,0.959712,0.99059,0.974897,0.990086
0,Logistic Regression,3,0.978662,0.977489,0.981978,0.979718,0.987861
3,Adaboost,3,0.965022,0.968177,0.964593,0.966364,0.987173
2,Decission Tree,3,0.921032,0.925602,0.922967,0.924234,0.921032
7,Gaussian NB,3,0.540191,0.293818,0.186284,0.155483,0.900799


In [114]:
early_start = 0
early_end = 10

# Criar uma lista com o nome dos eventos e seus respectivos tempos
eventos = ['event{}'.format(i) for i in range(1, 201)]
tempos = ['event{}time'.format(i) for i in range(1, 201)]

# Filtrar apenas os eventos que aconteceram antes de 10 minutos
eventos_early = []

for evento, tempo in zip(eventos, tempos):
    eventos_early.append(df[(df[tempo] >= early_start) & (df[tempo] <= early_end)][evento])

eventos_early.append(df[['blueTopGP','blueTopWR','blueTopKDA','blueJungleGP','blueJungleWR','blueJungleKDA','blueMidGP','blueMidWR','blueMidKDA','blueADCGP','blueADCWR','blueADCKDA','blueSupportGP','blueSupportWR','blueSupportKDA','redTopGP','redTopWR','redTopKDA','redJungleGP','redJungleWR','redJungleKDA','redMidGP','redMidWR','redMidKDA','redAdcGP','redAdcWR','redAdcKDA','redSupportGP','redSupportWR','redSupportKDA']])

# Criar um novo DataFrame com os eventos que aconteceram antes de 10 minutos
df_early = pd.concat(eventos_early, axis=1)
df_early = df_early.fillna(0)
df_early = df_early.astype(int)
X_early = df_early.copy()

executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X_early,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model']
ordered_scores

Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
3,Adaboost,3,0.681848,0.688334,0.717863,0.702635,0.753401
4,Random Forest,3,0.678687,0.678555,0.738278,0.707034,0.751069
5,Gradient Boosting Classifier,3,0.669224,0.657336,0.783413,0.714742,0.747997
0,Logistic Regression,3,0.674024,0.680112,0.714195,0.696658,0.738987
1,Support Vector Machine (RBF Kernel),3,0.668284,0.670371,0.724242,0.696204,0.73368
6,KNN,3,0.597039,0.611125,0.631579,0.621101,0.622613
2,Decission Tree,3,0.578574,0.595055,0.607496,0.601055,0.578574
7,Gaussian NB,3,0.503976,0.523232,0.99059,0.68475,0.533795


In [115]:
# Definindo uma função para contar os eventos
def contar_eventos(row):
    azul = sum(1 for evento in row if 1 <= evento <= 21)
    vermelho = sum(1 for evento in row if 22 <= evento <= 42)
    return azul, vermelho

def calcular_porcentagem(X, Y):
    porcentagem = (X / Y) * 100
    return porcentagem

# Selecionando apenas as colunas que começam com 'event'
colunas_event = [coluna for coluna in df_early.columns if coluna.startswith('event')]

# Criando as novas colunas
df_early['qtd_eventos_azul'], df_early['qtd_eventos_vermelho'] = zip(*df_early[colunas_event].apply(contar_eventos, axis=1))
df_early['vencedor_early'] = df_early.apply(lambda row: 1 if row['qtd_eventos_azul'] > row['qtd_eventos_vermelho'] else 0, axis=1)
# Exibindo o DataFrame resultante
df_early = pd.concat([df_early, y], axis=1)
df_early['result'] = df_early['result'].astype(int)

qtd_iguais = (df_early['vencedor_early'] == df_early['result']).sum()
resultado = calcular_porcentagem(qtd_iguais, df_early.shape[0])

print(f"Em {resultado:.2f}% de das partidas, o vencedor do early é igual ao vencedor da partida.")

Em 68.31% de das partidas, o vencedor do early é igual ao vencedor da partida.


In [116]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X_early, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 5.9539073467254635 (+/- 0.4043916640720693)
score_time: 0.4172987937927246 (+/- 0.0215199153597928)
test_Balanced Accuracy: 0.6792529841296684 (+/- 0.004155892229579242)
test_Precision: 0.6815076071160439 (+/- 0.005362712120534683)
test_Recall: 0.681242816419849 (+/- 0.004666401630771789)
test_F1: 0.6802847545896646 (+/- 0.004131868274394661)
test_AUC: 0.753183611552211 (+/- 0.003911822934272404)


In [117]:
mid_start = 0
mid_end = 20

# Criar uma lista com o nome dos eventos e seus respectivos tempos
eventos = ['event{}'.format(i) for i in range(1, 201)]
tempos = ['event{}time'.format(i) for i in range(1, 201)]

# Filtrar apenas os eventos que aconteceram antes de 10 minutos
eventos_mid = []

for evento, tempo in zip(eventos, tempos):
    eventos_mid.append(df[(df[tempo] >= mid_start) & (df[tempo] <= mid_end)][evento])

eventos_mid.append(df[['blueTopGP','blueTopWR','blueTopKDA','blueJungleGP','blueJungleWR','blueJungleKDA','blueMidGP','blueMidWR','blueMidKDA','blueADCGP','blueADCWR','blueADCKDA','blueSupportGP','blueSupportWR','blueSupportKDA','redTopGP','redTopWR','redTopKDA','redJungleGP','redJungleWR','redJungleKDA','redMidGP','redMidWR','redMidKDA','redAdcGP','redAdcWR','redAdcKDA','redSupportGP','redSupportWR','redSupportKDA']])

# Criar um novo DataFrame com os eventos que aconteceram antes de 10 minutos
df_mid = pd.concat(eventos_mid, axis=1)
df_mid = df_mid.fillna(0)
df_mid = df_mid.astype(int)
X_mid = df_mid.copy()

executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X_mid,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model']
ordered_scores



Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,3,0.764222,0.762466,0.800319,0.780769,0.854549
5,Gradient Boosting Classifier,3,0.752395,0.739089,0.820415,0.777475,0.84267
3,Adaboost,3,0.757745,0.763713,0.77799,0.770576,0.842052
1,Support Vector Machine (RBF Kernel),3,0.738815,0.738398,0.77815,0.757563,0.822387
0,Logistic Regression,3,0.737608,0.741574,0.766188,0.753563,0.816493
6,KNN,3,0.698636,0.696538,0.7563,0.725043,0.747745
7,Gaussian NB,3,0.522276,0.647867,0.116427,0.120075,0.730624
2,Decission Tree,3,0.641645,0.657789,0.653429,0.655487,0.641645


In [119]:
# Definindo uma função para contar os eventos
def contar_eventos(row):
    azul = sum(1 for evento in row if 1 <= evento <= 21)
    vermelho = sum(1 for evento in row if 22 <= evento <= 42)
    return azul, vermelho

def calcular_porcentagem(X, Y):
    porcentagem = (X / Y) * 100
    return porcentagem

# Selecionando apenas as colunas que começam com 'event'
colunas_event = [coluna for coluna in df_mid.columns if coluna.startswith('event')]

# Criando as novas colunas
df_mid['qtd_eventos_azul'], df_mid['qtd_eventos_vermelho'] = zip(*df_mid[colunas_event].apply(contar_eventos, axis=1))
df_mid['vencedor_mid'] = df_mid.apply(lambda row: 1 if row['qtd_eventos_azul'] > row['qtd_eventos_vermelho'] else 0, axis=1)
# Exibindo o DataFrame resultante
df_mid = pd.concat([df_mid, y], axis=1)
df_mid['result'] = df_mid['result'].astype(int)

qtd_iguais = (df_mid['vencedor_mid'] == df_mid['result']).sum()
resultado = calcular_porcentagem(qtd_iguais, df_mid.shape[0])

print(f"Em {resultado:.2f}% de das partidas, o vencedor do mid é igual ao vencedor da partida.")

Em 76.89% de das partidas, o vencedor do mid é igual ao vencedor da partida.


In [120]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X_mid, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 11.203036880493164 (+/- 3.351055117929954)
score_time: 0.6467308521270752 (+/- 0.1927222201227349)
test_Balanced Accuracy: 0.7680810564522295 (+/- 0.004892411808365414)
test_Precision: 0.7700471674469375 (+/- 0.005327606381584977)
test_Recall: 0.7695380240469264 (+/- 0.005012892276224945)
test_F1: 0.7690847706516597 (+/- 0.004932277247063487)
test_AUC: 0.8569570878761024 (+/- 0.007602098141100193)


In [121]:
late_start = 0
late_end = 90

# Criar uma lista com o nome dos eventos e seus respectivos tempos
eventos = ['event{}'.format(i) for i in range(1, 201)]
tempos = ['event{}time'.format(i) for i in range(1, 201)]

# Filtrar apenas os eventos que aconteceram antes de 10 minutos
eventos_late = []

for evento, tempo in zip(eventos, tempos):
    eventos_late.append(df[(df[tempo] >= late_start) & (df[tempo] <= late_end)][evento])

eventos_late.append(df[['blueTopGP','blueTopWR','blueTopKDA','blueJungleGP','blueJungleWR','blueJungleKDA','blueMidGP','blueMidWR','blueMidKDA','blueADCGP','blueADCWR','blueADCKDA','blueSupportGP','blueSupportWR','blueSupportKDA','redTopGP','redTopWR','redTopKDA','redJungleGP','redJungleWR','redJungleKDA','redMidGP','redMidWR','redMidKDA','redAdcGP','redAdcWR','redAdcKDA','redSupportGP','redSupportWR','redSupportKDA']])

# Criar um novo DataFrame com os eventos que aconteceram antes de 10 minutos
df_late = pd.concat(eventos_late, axis=1)
df_late = df_late.fillna(0)
df_late = df_late.astype(int)
X_late = df_late.copy()

executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X_late,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model_obj']
ordered_scores

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,"(DecisionTreeClassifier(max_depth=25, max_feat...",3,0.977616,0.991313,0.964434,0.977683,0.995291
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,3,0.967008,0.980205,0.955024,0.967438,0.992712
1,Support Vector Machine (RBF Kernel),"SVC(C=100, gamma=0.001, max_iter=50000, probab...",3,0.981788,0.988379,0.976077,0.982183,0.991227
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",3,0.963178,0.96451,0.965072,0.964772,0.98743
6,KNN,KNeighborsClassifier(n_neighbors=3),3,0.974353,0.973789,0.977352,0.975562,0.986723
0,Logistic Regression,LogisticRegression(max_iter=50000),3,0.931902,0.946432,0.920574,0.933275,0.974468
7,Gaussian NB,GaussianNB(),3,0.530943,0.964967,0.116746,0.121468,0.928783
2,Decission Tree,DecisionTreeClassifier(),3,0.919438,0.919302,0.927592,0.923406,0.919438


In [122]:
# Definindo uma função para contar os eventos
def contar_eventos(row):
    azul = sum(1 for evento in row if 1 <= evento <= 21)
    vermelho = sum(1 for evento in row if 22 <= evento <= 42)
    return azul, vermelho

def calcular_porcentagem(X, Y):
    porcentagem = (X / Y) * 100
    return porcentagem

# Selecionando apenas as colunas que começam com 'event'
colunas_event = [coluna for coluna in df_late.columns if coluna.startswith('event')]

# Criando as novas colunas
df_late['qtd_eventos_azul'], df_late['qtd_eventos_vermelho'] = zip(*df_late[colunas_event].apply(contar_eventos, axis=1))
df_late['vencedor_late'] = df_late.apply(lambda row: 1 if row['qtd_eventos_azul'] > row['qtd_eventos_vermelho'] else 0, axis=1)
# Exibindo o DataFrame resultante
df_late = pd.concat([df_late, y], axis=1)
df_late['result'] = df_late['result'].astype(int)

qtd_iguais = (df_late['vencedor_late'] == df_late['result']).sum()
resultado = calcular_porcentagem(qtd_iguais, df_late.shape[0])

print(f"Em {resultado:.2f}% de das partidas, o vencedor do late é igual ao vencedor da partida.")

Em 97.24% de das partidas, o vencedor do late é igual ao vencedor da partida.


In [123]:
import pickle

with open("../../models/game-stages/model.pkl", "wb") as f:
    pickle.dump(best_model, f)

with open("../../scalers/game-stages/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

In [124]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X_late, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 5.551531219482422 (+/- 0.38217359509065657)
score_time: 0.34269938468933103 (+/- 0.056593213067047066)
test_Balanced Accuracy: 0.9779587020555821 (+/- 0.004375606023011859)
test_Precision: 0.9778840432364347 (+/- 0.004324082857280062)
test_Recall: 0.9773851560781905 (+/- 0.004510052909269406)
test_F1: 0.9773936314185032 (+/- 0.0045075873844320985)
test_AUC: 0.9958930575601915 (+/- 0.0024022900373584875)


In [125]:
# Fazer previsões no conjunto de teste
y_pred = best_model.predict(X_test)
y_test = y_test.reset_index(drop=True)

In [137]:
# Obtendo a importância das características
feature_importances = best_model.feature_importances_

# Criando um DataFrame para visualizar as importâncias
importance_df = pd.DataFrame({'Feature': X_late.columns, 'Importance': feature_importances})

# Ordenando as características pela importância
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Visualizando as 10 características mais importantes
importance_df.head(20)

Unnamed: 0,Feature,Importance
44,event45,0.046113
45,event46,0.040576
46,event47,0.038998
47,event48,0.037346
43,event44,0.035508
41,event42,0.032501
39,event40,0.032155
42,event43,0.03119
49,event50,0.031099
48,event49,0.030971


In [150]:
df_eventos = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills.csv')

colunas = ['event45', 'event46', 'event47', 'event48', 'event44']  # Substitua com os nomes das suas colunas

for coluna in colunas:
    contagem = df_eventos[coluna].value_counts().head(5)  # Obtém as três maiores contagens
    print(f"Cinco valores mais frequentes em {coluna}:")
    print(contagem)
    print("="*30)

Cinco valores mais frequentes em event45:
BLUE: kill           1448
RED: kill            1410
BLUE: nexus_tower     192
BLUE: dragon          160
RED: dragon           156
Name: event45, dtype: int64
Cinco valores mais frequentes em event46:
BLUE: kill           1487
RED: kill            1326
BLUE: nexus_tower     183
RED: nexus_tower      176
BLUE: dragon          150
Name: event46, dtype: int64
Cinco valores mais frequentes em event47:
BLUE: kill           1449
RED: kill            1304
BLUE: nexus_tower     219
RED: nexus_tower      184
BLUE: dragon          138
Name: event47, dtype: int64
Cinco valores mais frequentes em event48:
BLUE: kill           1348
RED: kill            1320
BLUE: nexus_tower     228
RED: nexus_tower      193
BLUE: dragon          127
Name: event48, dtype: int64
Cinco valores mais frequentes em event44:
BLUE: kill           1462
RED: kill            1425
BLUE: nexus_tower     194
BLUE: dragon          166
RED: dragon           165
Name: event44, dtype: int64


In [151]:
df_eventos = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills.csv')

colunas = ['event45time', 'event46time', 'event47time', 'event48time', 'event44time']  # Substitua com os nomes das suas colunas

for coluna in colunas:
    contagem = df_eventos[coluna].value_counts().head(5)  # Obtém as três maiores contagens
    print(f"Cinco valores mais frequentes em {coluna}:")
    print(contagem)
    print("="*30)

Cinco valores mais frequentes em event45time:
26.0    466
27.0    458
28.0    445
29.0    437
30.0    409
Name: event45time, dtype: int64
Cinco valores mais frequentes em event46time:
29.0    457
28.0    456
27.0    452
26.0    433
30.0    391
Name: event46time, dtype: int64
Cinco valores mais frequentes em event47time:
29.0    463
28.0    456
27.0    450
30.0    389
26.0    383
Name: event47time, dtype: int64
Cinco valores mais frequentes em event48time:
29.0    453
28.0    446
27.0    433
30.0    382
26.0    358
Name: event48time, dtype: int64
Cinco valores mais frequentes em event44time:
27.0    465
26.0    457
29.0    446
28.0    444
25.0    431
Name: event44time, dtype: int64


In [138]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
y_prob = best_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_prob)
partidas_erradas = y_test[y_test != y_pred].index.tolist()

print('Testes realizados com ' + str(len(y_test)) + ' partidas.')
print('Acurácia: ' + str(accuracy))
print('AUC: ' + str(auc))
for index in partidas_erradas:
    print('O resultado era ' + str(y_test[index]) + ' e o previsto foi ' + str(y_pred[index]))

Testes realizados com 1203 partidas.
Acurácia: 0.9767248545303409
AUC: 0.9940385211766791
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o 