In [1]:
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def qtd_eventos(integer):
    return int((400 - integer) / 2)

def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,stratify=y)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test,scaler

In [2]:
df = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills.csv')

qtd_eventos = list(map(qtd_eventos, df.isnull().sum(axis=1).tolist()))
df['qtd_eventos'] = qtd_eventos
df = df.replace([
    'BLUE: kill',
    'BLUE: plate',
    'BLUE: first_blood',
    'BLUE: dragon',
    'BLUE: herald',
    'BLUE: first_tower_top',
    'BLUE: first_tower_mid',
    'BLUE: first_tower_bot',
    'BLUE: second_tower_top',
    'BLUE: second_tower_mid',
    'BLUE: second_tower_bot',
    'BLUE: third_tower_top',
    'BLUE: third_tower_mid',
    'BLUE: third_tower_bot',
    'BLUE: inhibitor_top',
    'BLUE: inhibitor_mid',
    'BLUE: inhibitor_bot',
    'BLUE: baron',
    'BLUE: elder_dragon',
    'BLUE: nexus_tower',
    'BLUE: nexus',
    'RED: kill',
    'RED: plate',
    'RED: first_blood',
    'RED: dragon',
    'RED: herald',
    'RED: first_tower_top',
    'RED: first_tower_mid',
    'RED: first_tower_bot',
    'RED: second_tower_top',
    'RED: second_tower_mid',
    'RED: second_tower_bot',
    'RED: third_tower_top',
    'RED: third_tower_mid',
    'RED: third_tower_bot',
    'RED: inhibitor_top',
    'RED: inhibitor_mid',
    'RED: inhibitor_bot',
    'RED: baron',
    'RED: elder_dragon',
    'RED: nexus_tower',
    'RED: nexus'], range(1,43))

df = df.drop(['game'],axis=1)
df = df.fillna(0)
df = df.astype(int)
y = df['result'].copy()
X = df.drop(['golId','result','qtd_eventos'],axis=1)


  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 7.6569136619567875 (+/- 0.5860570064756998)
score_time: 0.3426473617553711 (+/- 0.03792541358553678)
test_Balanced Accuracy: 0.9803532012050284 (+/- 0.005187580243418611)
test_Precision: 0.9802352640201779 (+/- 0.00519852539374372)
test_Recall: 0.979879336600263 (+/- 0.005350485383502701)
test_F1: 0.9798865729927364 (+/- 0.005347444803129479)
test_AUC: 0.9960870306899411 (+/- 0.0024319403787203416)


In [4]:
executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model']
ordered_scores

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,3,0.982406,0.993317,0.97193,0.982502,0.99602
5,Gradient Boosting Classifier,3,0.972716,0.984973,0.961404,0.973038,0.993953
1,Support Vector Machine (RBF Kernel),3,0.988454,0.991377,0.986284,0.988812,0.993747
6,KNN,3,0.971783,0.960084,0.988357,0.974,0.990451
0,Logistic Regression,3,0.980666,0.981228,0.981818,0.981512,0.989751
3,Adaboost,3,0.962914,0.967266,0.961244,0.964226,0.987494
2,Decission Tree,3,0.923213,0.926588,0.926635,0.926472,0.923213
7,Gaussian NB,3,0.572613,0.510319,0.273525,0.238283,0.906426


In [5]:
early_start = 0
early_end = 10

# Criar uma lista com o nome dos eventos e seus respectivos tempos
eventos = ['event{}'.format(i) for i in range(1, 201)]
tempos = ['event{}time'.format(i) for i in range(1, 201)]

# Filtrar apenas os eventos que aconteceram antes de 10 minutos
eventos_early = []

for evento, tempo in zip(eventos, tempos):
    eventos_early.append(df[(df[tempo] >= early_start) & (df[tempo] <= early_end)][evento])

eventos_early.append(df[['blueTopGP','blueTopWR','blueTopKDA','blueJungleGP','blueJungleWR','blueJungleKDA','blueMidGP','blueMidWR','blueMidKDA','blueADCGP','blueADCWR','blueADCKDA','blueSupportGP','blueSupportWR','blueSupportKDA','redTopGP','redTopWR','redTopKDA','redJungleGP','redJungleWR','redJungleKDA','redMidGP','redMidWR','redMidKDA','redAdcGP','redAdcWR','redAdcKDA','redSupportGP','redSupportWR','redSupportKDA']])

# Criar um novo DataFrame com os eventos que aconteceram antes de 10 minutos
df_early = pd.concat(eventos_early, axis=1)
df_early = df_early.fillna(0)
df_early = df_early.astype(int)
X_early = df_early.copy()

executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X_early,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model']
ordered_scores

Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
3,Adaboost,3,0.678614,0.686501,0.710526,0.698241,0.74863
4,Random Forest,3,0.679124,0.68011,0.73429,0.7061,0.748002
5,Gradient Boosting Classifier,3,0.667194,0.654553,0.786124,0.714308,0.742942
0,Logistic Regression,3,0.673647,0.67983,0.712919,0.695885,0.735933
1,Support Vector Machine (RBF Kernel),3,0.6687,0.67183,0.720734,0.695305,0.727608
6,KNN,3,0.592021,0.605395,0.633174,0.618865,0.614039
2,Decission Tree,3,0.586099,0.603916,0.602233,0.602954,0.586099
7,Gaussian NB,3,0.504548,0.523514,0.991388,0.685189,0.528064


In [6]:
# Definindo uma função para contar os eventos
def contar_eventos(row):
    azul = sum(1 for evento in row if 1 <= evento <= 21)
    vermelho = sum(1 for evento in row if 22 <= evento <= 42)
    return azul, vermelho

def calcular_porcentagem(X, Y):
    porcentagem = (X / Y) * 100
    return porcentagem

# Selecionando apenas as colunas que começam com 'event'
colunas_event = [coluna for coluna in df_early.columns if coluna.startswith('event')]

# Criando as novas colunas
df_early['qtd_eventos_azul'], df_early['qtd_eventos_vermelho'] = zip(*df_early[colunas_event].apply(contar_eventos, axis=1))
df_early['vencedor_early'] = df_early.apply(lambda row: 1 if row['qtd_eventos_azul'] > row['qtd_eventos_vermelho'] else 0, axis=1)
# Exibindo o DataFrame resultante
df_early = pd.concat([df_early, y], axis=1)
df_early['result'] = df_early['result'].astype(int)

qtd_iguais = (df_early['vencedor_early'] == df_early['result']).sum()
resultado = calcular_porcentagem(qtd_iguais, df_early.shape[0])

print(f"Em {resultado:.2f}% de das partidas, o vencedor do early é igual ao vencedor da partida.")

Em 68.31% de das partidas, o vencedor do early é igual ao vencedor da partida.


In [20]:
# Definindo o modelo
model = AdaBoostClassifier()

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X_early, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 0.7798592567443847 (+/- 0.05074023625877756)
score_time: 0.11602272987365722 (+/- 0.010056766227367299)
test_Balanced Accuracy: 0.6814074291515972 (+/- 0.004820442593968845)
test_Precision: 0.6826600938875172 (+/- 0.0047136019484948595)
test_Recall: 0.682407126941382 (+/- 0.004940923150315733)
test_F1: 0.6820287825679131 (+/- 0.004941467676553978)
test_AUC: 0.7560156432269945 (+/- 0.00885501676805403)


In [8]:
mid_start = 11
mid_end = 20

# Criar uma lista com o nome dos eventos e seus respectivos tempos
eventos = ['event{}'.format(i) for i in range(1, 201)]
tempos = ['event{}time'.format(i) for i in range(1, 201)]

# Filtrar apenas os eventos que aconteceram antes de 10 minutos
eventos_mid = []

for evento, tempo in zip(eventos, tempos):
    eventos_mid.append(df[(df[tempo] >= mid_start) & (df[tempo] <= mid_end)][evento])

eventos_mid.append(df[['blueTopGP','blueTopWR','blueTopKDA','blueJungleGP','blueJungleWR','blueJungleKDA','blueMidGP','blueMidWR','blueMidKDA','blueADCGP','blueADCWR','blueADCKDA','blueSupportGP','blueSupportWR','blueSupportKDA','redTopGP','redTopWR','redTopKDA','redJungleGP','redJungleWR','redJungleKDA','redMidGP','redMidWR','redMidKDA','redAdcGP','redAdcWR','redAdcKDA','redSupportGP','redSupportWR','redSupportKDA']])

# Criar um novo DataFrame com os eventos que aconteceram antes de 10 minutos
df_mid = pd.concat(eventos_mid, axis=1)
df_mid = df_mid.fillna(0)
df_mid = df_mid.astype(int)
X_mid = df_mid.copy()

executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X_mid,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model']
ordered_scores

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,model,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,3,0.770547,0.769812,0.802552,0.785782,0.856468
3,Adaboost,3,0.759015,0.760542,0.788517,0.774224,0.846033
5,Gradient Boosting Classifier,3,0.752174,0.732516,0.837161,0.781278,0.843586
1,Support Vector Machine (RBF Kernel),3,0.7388,0.735762,0.78437,0.759226,0.819975
0,Logistic Regression,3,0.726151,0.723779,0.774003,0.747985,0.800064
6,KNN,3,0.683409,0.677268,0.762998,0.717454,0.729758
7,Gaussian NB,3,0.517403,0.497601,0.071611,0.073152,0.722087
2,Decission Tree,3,0.639249,0.653807,0.658533,0.656036,0.639249


In [9]:
# Definindo uma função para contar os eventos
def contar_eventos(row):
    azul = sum(1 for evento in row if 1 <= evento <= 21)
    vermelho = sum(1 for evento in row if 22 <= evento <= 42)
    return azul, vermelho

def calcular_porcentagem(X, Y):
    porcentagem = (X / Y) * 100
    return porcentagem

# Selecionando apenas as colunas que começam com 'event'
colunas_event = [coluna for coluna in df_mid.columns if coluna.startswith('event')]

# Criando as novas colunas
df_mid['qtd_eventos_azul'], df_mid['qtd_eventos_vermelho'] = zip(*df_mid[colunas_event].apply(contar_eventos, axis=1))
df_mid['vencedor_mid'] = df_mid.apply(lambda row: 1 if row['qtd_eventos_azul'] > row['qtd_eventos_vermelho'] else 0, axis=1)
# Exibindo o DataFrame resultante
df_mid = pd.concat([df_mid, y], axis=1)
df_mid['result'] = df_mid['result'].astype(int)

qtd_iguais = (df_mid['vencedor_mid'] == df_mid['result']).sum()
resultado = calcular_porcentagem(qtd_iguais, df_mid.shape[0])

print(f"Em {resultado:.2f}% de das partidas, o vencedor do mid é igual ao vencedor da partida.")

Em 77.00% de das partidas, o vencedor do mid é igual ao vencedor da partida.


In [10]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X_mid, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 5.601219034194946 (+/- 0.17579902231888175)
score_time: 0.38207206726074217 (+/- 0.005616662185516731)
test_Balanced Accuracy: 0.7690193717898933 (+/- 0.011708720183918043)
test_Precision: 0.7708039993575884 (+/- 0.012519135877560003)
test_Recall: 0.7703676194981209 (+/- 0.01203457162617423)
test_F1: 0.7699815244381265 (+/- 0.011860201959558577)
test_AUC: 0.8513109048286791 (+/- 0.011467514591578088)


In [11]:
late_start = 21
late_end = 90

# Criar uma lista com o nome dos eventos e seus respectivos tempos
eventos = ['event{}'.format(i) for i in range(1, 201)]
tempos = ['event{}time'.format(i) for i in range(1, 201)]

# Filtrar apenas os eventos que aconteceram antes de 10 minutos
eventos_late = []

for evento, tempo in zip(eventos, tempos):
    eventos_late.append(df[(df[tempo] >= late_start) & (df[tempo] <= late_end)][evento])

eventos_late.append(df[['blueTopGP','blueTopWR','blueTopKDA','blueJungleGP','blueJungleWR','blueJungleKDA','blueMidGP','blueMidWR','blueMidKDA','blueADCGP','blueADCWR','blueADCKDA','blueSupportGP','blueSupportWR','blueSupportKDA','redTopGP','redTopWR','redTopKDA','redJungleGP','redJungleWR','redJungleKDA','redMidGP','redMidWR','redMidKDA','redAdcGP','redAdcWR','redAdcKDA','redSupportGP','redSupportWR','redSupportKDA']])

# Criar um novo DataFrame com os eventos que aconteceram antes de 10 minutos
df_late = pd.concat(eventos_late, axis=1)
df_late = df_late.fillna(0)
df_late = df_late.astype(int)
X_late = df_late.copy()

executions = pd.DataFrame()

for i in range(10):
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X_late,y)
  
  models = {
    'Logistic Regression': LogisticRegression(max_iter=50000),
    'Support Vector Machine (RBF Kernel)': SVC(C=100,gamma=0.001,kernel='rbf',max_iter=50000,probability=True),
    'Decission Tree': DecisionTreeClassifier(),
    'Adaboost': AdaBoostClassifier(),
    'Random Forest': RandomForestClassifier(max_depth=25,n_estimators=600,min_samples_split=2,min_samples_leaf=1),
    'Gradient Boosting Classifier': GradientBoostingClassifier(criterion='friedman_mse',learning_rate=0.025,loss='deviance',max_depth=4,max_features='log2',min_samples_leaf=8,min_samples_split=3, n_estimators=100,subsample=0.5),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '3',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '3',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)
best_model = avg_scores.sort_values(by='auc', ascending=False).iloc[0]['model_obj']
ordered_scores

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,"(DecisionTreeClassifier(max_depth=25, max_feat...",3,0.97481,0.985508,0.965072,0.975168,0.99334
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,3,0.957585,0.967276,0.950239,0.958652,0.987427
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",3,0.958386,0.959011,0.961563,0.960255,0.985872
1,Support Vector Machine (RBF Kernel),"SVC(C=100, gamma=0.001, max_iter=50000, probab...",3,0.972986,0.976925,0.970973,0.973923,0.984842
6,KNN,KNeighborsClassifier(n_neighbors=3),3,0.952439,0.938582,0.974322,0.9561,0.975293
0,Logistic Regression,LogisticRegression(max_iter=50000),3,0.874804,0.881077,0.878947,0.879893,0.927698
2,Decission Tree,DecisionTreeClassifier(),3,0.920251,0.9242,0.922967,0.923528,0.920251
7,Gaussian NB,GaussianNB(),3,0.516125,0.510502,0.094577,0.078879,0.903152


In [12]:
# Definindo uma função para contar os eventos
def contar_eventos(row):
    azul = sum(1 for evento in row if 1 <= evento <= 21)
    vermelho = sum(1 for evento in row if 22 <= evento <= 42)
    return azul, vermelho

def calcular_porcentagem(X, Y):
    porcentagem = (X / Y) * 100
    return porcentagem

# Selecionando apenas as colunas que começam com 'event'
colunas_event = [coluna for coluna in df_late.columns if coluna.startswith('event')]

# Criando as novas colunas
df_late['qtd_eventos_azul'], df_late['qtd_eventos_vermelho'] = zip(*df_late[colunas_event].apply(contar_eventos, axis=1))
df_late['vencedor_late'] = df_late.apply(lambda row: 1 if row['qtd_eventos_azul'] > row['qtd_eventos_vermelho'] else 0, axis=1)
# Exibindo o DataFrame resultante
df_late = pd.concat([df_late, y], axis=1)
df_late['result'] = df_late['result'].astype(int)

qtd_iguais = (df_late['vencedor_late'] == df_late['result']).sum()
resultado = calcular_porcentagem(qtd_iguais, df_late.shape[0])

print(f"Em {resultado:.2f}% das partidas, o vencedor do late é igual ao vencedor da partida.")

Em 97.81% de das partidas, o vencedor do late é igual ao vencedor da partida.


In [14]:
# Definindo o modelo
model = RandomForestClassifier(max_depth=50, n_estimators=600, min_samples_split=2, min_samples_leaf=1)

# Definindo as métricas
scoring = {
    'Balanced Accuracy': make_scorer(balanced_accuracy_score),
    'Precision': make_scorer(precision_score, average='weighted'),
    'Recall': make_scorer(recall_score, average='weighted'),
    'F1': make_scorer(f1_score, average='weighted'),
    'AUC': make_scorer(roc_auc_score, needs_proba=True, average='weighted')
}

# Realizando a validação cruzada
cv_results = cross_validate(model, X_late, y, cv=5, scoring=scoring, error_score='raise')

# Extraindo os resultados
for metric, scores in cv_results.items():
    print(f'{metric}: {scores.mean()} (+/- {scores.std()})')


fit_time: 5.015894794464112 (+/- 0.23099545667559607)
score_time: 0.3372201442718506 (+/- 0.03896682310915093)
test_Balanced Accuracy: 0.9766482087875292 (+/- 0.005384925936980041)
test_Precision: 0.9765530133847516 (+/- 0.00540035467899779)
test_Recall: 0.9762209838686701 (+/- 0.005434627920208715)
test_F1: 0.9762284973835884 (+/- 0.005432235453615175)
test_AUC: 0.9953144910581837 (+/- 0.002953649668429816)


In [15]:
# Fazer previsões no conjunto de teste
y_pred = best_model.predict(X_test)
y_test = y_test.reset_index(drop=True)

In [16]:
# Obtendo a importância das características
feature_importances = best_model.feature_importances_

# Criando um DataFrame para visualizar as importâncias
importance_df = pd.DataFrame({'Feature': X_late.columns, 'Importance': feature_importances})

# Ordenando as características pela importância
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Visualizando as 10 características mais importantes
importance_df.head(20)

Unnamed: 0,Feature,Importance
45,event46,0.047115
47,event48,0.044633
44,event45,0.042999
46,event47,0.039119
49,event50,0.03611
48,event49,0.033093
50,event51,0.032792
43,event44,0.032312
55,event56,0.031655
42,event43,0.029614


In [17]:
df_eventos = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills.csv')

colunas = ['event45', 'event46', 'event47', 'event48', 'event44']  # Substitua com os nomes das suas colunas

for coluna in colunas:
    contagem = df_eventos[coluna].value_counts().head(5)  # Obtém as três maiores contagens
    print(f"Cinco valores mais frequentes em {coluna}:")
    print(contagem)
    print("="*30)

Cinco valores mais frequentes em event45:
BLUE: kill           1448
RED: kill            1410
BLUE: nexus_tower     192
BLUE: dragon          160
RED: dragon           156
Name: event45, dtype: int64
Cinco valores mais frequentes em event46:
BLUE: kill           1487
RED: kill            1326
BLUE: nexus_tower     183
RED: nexus_tower      176
BLUE: dragon          150
Name: event46, dtype: int64
Cinco valores mais frequentes em event47:
BLUE: kill           1449
RED: kill            1304
BLUE: nexus_tower     219
RED: nexus_tower      184
BLUE: dragon          138
Name: event47, dtype: int64
Cinco valores mais frequentes em event48:
BLUE: kill           1348
RED: kill            1320
BLUE: nexus_tower     228
RED: nexus_tower      193
BLUE: dragon          127
Name: event48, dtype: int64
Cinco valores mais frequentes em event44:
BLUE: kill           1462
RED: kill            1425
BLUE: nexus_tower     194
BLUE: dragon          166
RED: dragon           165
Name: event44, dtype: int64


  interactivity=interactivity, compiler=compiler, result=result)


In [18]:
df_eventos = pd.read_csv('../../data/crawler/unified-events-statistics-with-kills.csv')

colunas = ['event45time', 'event46time', 'event47time', 'event48time', 'event44time']  # Substitua com os nomes das suas colunas

for coluna in colunas:
    contagem = df_eventos[coluna].value_counts().head(5)  # Obtém as três maiores contagens
    print(f"Cinco valores mais frequentes em {coluna}:")
    print(contagem)
    print("="*30)

Cinco valores mais frequentes em event45time:
26.0    466
27.0    458
28.0    445
29.0    437
30.0    409
Name: event45time, dtype: int64
Cinco valores mais frequentes em event46time:
29.0    457
28.0    456
27.0    452
26.0    433
30.0    391
Name: event46time, dtype: int64
Cinco valores mais frequentes em event47time:
29.0    463
28.0    456
27.0    450
30.0    389
26.0    383
Name: event47time, dtype: int64
Cinco valores mais frequentes em event48time:
29.0    453
28.0    446
27.0    433
30.0    382
26.0    358
Name: event48time, dtype: int64
Cinco valores mais frequentes em event44time:
27.0    465
26.0    457
29.0    446
28.0    444
25.0    431
Name: event44time, dtype: int64


In [19]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
y_prob = best_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_prob)
partidas_erradas = y_test[y_test != y_pred].index.tolist()

print('Testes realizados com ' + str(len(y_test)) + ' partidas.')
print('Acurácia: ' + str(accuracy))
print('AUC: ' + str(auc))
for index in partidas_erradas:
    print('O resultado era ' + str(y_test[index]) + ' e o previsto foi ' + str(y_pred[index]))

Testes realizados com 1203 partidas.
Acurácia: 0.970074812967581
AUC: 0.9924726984759878
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 0 e o previsto foi 1
O resultado era 1 e o previsto foi 0
O resultado era 0 e o previsto foi 1
O resultado era 1 e o p