In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score,roc_auc_score
MINUTO_ANALISADO = 14

In [2]:
df = pd.read_csv('../../../data/crawler/unified-events-time-statistics.csv')
filtro = df['time'] == MINUTO_ANALISADO
df = df[filtro]
df

Unnamed: 0,golId,time,result,blueTopGP,blueTopWR,blueTopKDA,blueJungleGP,blueJungleWR,blueJungleKDA,blueMidGP,...,RED:third_tower_top,RED:third_tower_mid,RED:third_tower_bot,RED:inhibitor_top,RED:inhibitor_mid,RED:inhibitor_bot,RED:baron,RED:elder_dragon,RED:nexus_tower,RED:nexus
14,35797,14,0,0,0.00,0.00,0,0.00,0.00,0,...,0,0,0,0,0,0,0,0,0,0
55,35798,14,0,0,0.00,0.00,2,0.00,1.18,0,...,0,0,0,0,0,0,0,0,0,0
104,35799,14,1,2,0.00,1.00,5,1.00,5.78,0,...,0,0,0,0,0,0,0,0,0,0
143,35800,14,1,3,0.33,4.25,0,0.00,0.00,0,...,0,0,0,0,0,0,0,0,0,0
178,35801,14,0,0,0.00,0.00,3,0.33,2.08,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195354,49142,14,0,2,0.50,3.67,8,0.62,5.18,0,...,0,0,0,0,0,0,0,0,0,0
195383,53243,14,0,4,0.25,2.83,10,0.80,8.38,2,...,0,0,0,0,0,0,0,0,0,0
195414,53244,14,1,5,0.80,2.91,3,1.00,3.75,10,...,0,0,0,0,0,0,0,0,0,0
195447,45053,14,0,0,0.00,0.00,2,0.00,4.71,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
y = df['result'].copy()
X = df.drop(['golId','result','time'],axis=1)
X.head()

Unnamed: 0,blueTopGP,blueTopWR,blueTopKDA,blueJungleGP,blueJungleWR,blueJungleKDA,blueMidGP,blueMidWR,blueMidKDA,blueADCGP,...,RED:third_tower_top,RED:third_tower_mid,RED:third_tower_bot,RED:inhibitor_top,RED:inhibitor_mid,RED:inhibitor_bot,RED:baron,RED:elder_dragon,RED:nexus_tower,RED:nexus
14,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
55,0,0.0,0.0,2,0.0,1.18,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
104,2,0.0,1.0,5,1.0,5.78,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
143,3,0.33,4.25,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
178,0,0.0,0.0,3,0.33,2.08,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,stratify=y,random_state=42)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test,scaler

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

executions = pd.DataFrame()

for i in range(50):
  print('execução número ' + str(i))
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X,y)

  models = {
      'Logistic Regression': LogisticRegression(max_iter=10000),
      'Support Vector Machine (RBF Kernel)': SVC(C=1,gamma=0.01,kernel='sigmoid',max_iter=50000,probability=True),
      'Decission Tree': DecisionTreeClassifier(),
      'Adaboost': AdaBoostClassifier(),
      'Random Forest': RandomForestClassifier(),
      'Gradient Boosting Classifier': GradientBoostingClassifier(),
      'KNN': KNeighborsClassifier(n_neighbors=3),
      'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'game_minutes': MINUTO_ANALISADO,
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

execução número 0
execução número 1
execução número 2
execução número 3
execução número 4
execução número 5
execução número 6
execução número 7
execução número 8
execução número 9
execução número 10
execução número 11
execução número 12
execução número 13
execução número 14
execução número 15
execução número 16
execução número 17
execução número 18
execução número 19
execução número 20
execução número 21
execução número 22
execução número 23
execução número 24
execução número 25
execução número 26
execução número 27
execução número 28
execução número 29
execução número 30
execução número 31
execução número 32
execução número 33
execução número 34
execução número 35
execução número 36
execução número 37
execução número 38
execução número 39
execução número 40
execução número 41
execução número 42
execução número 43
execução número 44
execução número 45
execução número 46
execução número 47
execução número 48
execução número 49


In [6]:
avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'model_obj': model,
      'game_minutes': MINUTO_ANALISADO,
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)

In [7]:
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)

In [8]:
ordered_scores

Unnamed: 0,model,model_obj,game_minutes,Balanced Accuracy,Precision,Recall,F1-Score,auc
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,14,0.694806,0.699117,0.733014,0.715664,0.774581
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",14,0.696384,0.705148,0.720893,0.712934,0.769346
0,Logistic Regression,LogisticRegression(max_iter=10000),14,0.694648,0.702955,0.720893,0.711811,0.766258
1,Support Vector Machine (RBF Kernel),"SVC(C=1, gamma=0.01, kernel='sigmoid', max_ite...",14,0.695163,0.70092,0.728868,0.714621,0.763723
4,Random Forest,"(DecisionTreeClassifier(max_features='auto', r...",14,0.688821,0.698206,0.713301,0.705647,0.760258
7,Gaussian NB,GaussianNB(),14,0.555161,0.551224,0.969697,0.70289,0.753788
6,KNN,KNeighborsClassifier(n_neighbors=3),14,0.615622,0.633712,0.623604,0.628617,0.657198
2,Decission Tree,DecisionTreeClassifier(),14,0.605034,0.621513,0.623158,0.622305,0.605034


In [9]:
# Ensemble Voting Classifier
from sklearn.ensemble import VotingClassifier

# Criando o ensemble com os classificadores
ensemble_classifier = VotingClassifier(estimators=[
    (ordered_scores.iloc[0]['model'], ordered_scores.iloc[0]['model_obj']),
    (ordered_scores.iloc[1]['model'], ordered_scores.iloc[1]['model_obj']),
    (ordered_scores.iloc[2]['model'], ordered_scores.iloc[2]['model_obj'])
], voting='soft')

ensemble_classifier = ensemble_classifier.fit(X_train, y_train)

In [10]:
ensemble_scores_list = []
ensemble_scores_list.append({
    'model': 'Ensemble Classifier',
    'model_obj': ensemble_classifier,
    'game_minutes': MINUTO_ANALISADO,
    'Balanced Accuracy': balanced_accuracy_score(y_test,ensemble_classifier.predict(X_test)),
    'Precision':  precision_score(y_test,ensemble_classifier.predict(X_test)),
    'Recall': recall_score(y_test,ensemble_classifier.predict(X_test)),
    'F1-Score': f1_score(y_test,ensemble_classifier.predict(X_test)),
    'auc': roc_auc_score(y_test,ensemble_classifier.predict_proba(X_test)[:,1])
})

In [11]:
ensemble_scores = pd.DataFrame(ensemble_scores_list)
ensemble_scores_list

[{'model': 'Ensemble Classifier',
  'model_obj': VotingClassifier(estimators=[('Gradient Boosting Classifier',
                                GradientBoostingClassifier()),
                               ('Adaboost', AdaBoostClassifier()),
                               ('Logistic Regression',
                                LogisticRegression(max_iter=10000))],
                   voting='soft'),
  'game_minutes': 14,
  'Balanced Accuracy': 0.6900958599149388,
  'Precision': 0.6957186544342507,
  'Recall': 0.7256778309409888,
  'F1-Score': 0.7103825136612021,
  'auc': 0.7746973573453836}]

In [12]:
ensemble_scores

Unnamed: 0,model,model_obj,game_minutes,Balanced Accuracy,Precision,Recall,F1-Score,auc
0,Ensemble Classifier,VotingClassifier(estimators=[('Gradient Boosti...,14,0.690096,0.695719,0.725678,0.710383,0.774697


In [13]:
models = pd.concat([ensemble_scores,avg_scores])
ordered_models = models.sort_values(by='auc', ascending=False)
best_model = ordered_models.iloc[0]['model_obj']
best_model

VotingClassifier(estimators=[('Gradient Boosting Classifier',
                              GradientBoostingClassifier()),
                             ('Adaboost', AdaBoostClassifier()),
                             ('Logistic Regression',
                              LogisticRegression(max_iter=10000))],
                 voting='soft')

In [14]:
header = ['model', 'game_minutes', 'auc']
models.to_csv('../../../data/models-analysis/models-time-metric.csv', columns = header, mode='a',index=False,header=False)