In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score,precision_score,recall_score,f1_score,roc_auc_score

In [2]:
df = pd.read_csv('../../../data/crawler/unified-events-time-statistics.csv')
df.head()

Unnamed: 0,golId,time,result,blueTopGP,blueTopWR,blueTopKDA,blueJungleGP,blueJungleWR,blueJungleKDA,blueMidGP,...,RED:third_tower_top,RED:third_tower_mid,RED:third_tower_bot,RED:inhibitor_top,RED:inhibitor_mid,RED:inhibitor_bot,RED:baron,RED:elder_dragon,RED:nexus_tower,RED:nexus
0,35797,0,0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
1,35797,1,0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
2,35797,2,0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
3,35797,3,0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
4,35797,4,0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
y = df['result'].copy()
X = df.drop(['golId','result','time'],axis=1)
X.head()

Unnamed: 0,blueTopGP,blueTopWR,blueTopKDA,blueJungleGP,blueJungleWR,blueJungleKDA,blueMidGP,blueMidWR,blueMidKDA,blueADCGP,...,RED:third_tower_top,RED:third_tower_mid,RED:third_tower_bot,RED:inhibitor_top,RED:inhibitor_mid,RED:inhibitor_bot,RED:baron,RED:elder_dragon,RED:nexus_tower,RED:nexus
0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
def preprocess_input(X,y):
    X = X.copy()
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,stratify=y,random_state=42)
    scaler = StandardScaler()   
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train,X_test,y_train,y_test,scaler

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

executions = pd.DataFrame()

for i in range(10):
  print('execução número ' + str(i))
  X_train,X_test,y_train,y_test,scaler = preprocess_input(X,y)

  models = {
      'Logistic Regression': LogisticRegression(max_iter=10000),
      'Support Vector Machine (RBF Kernel)': SVC(C=1,gamma=0.01,kernel='sigmoid',max_iter=50000,probability=True),
      'Decission Tree': DecisionTreeClassifier(),
      'Adaboost': AdaBoostClassifier(),
      'Random Forest': RandomForestClassifier(),
      'Gradient Boosting Classifier': GradientBoostingClassifier(),
      'KNN': KNeighborsClassifier(n_neighbors=3),
      'Gaussian NB': GaussianNB()
  }

  for name, model in models.items():
    model.fit(X_train,y_train)

  scores_list = []

  for name,model in models.items():    
      scores_list.append({
      'model': name,
      'amount_events': '0',
      'execution': i,
      'Balanced Accuracy': balanced_accuracy_score(y_test,model.predict(X_test)),
      'Precision':  precision_score(y_test,model.predict(X_test)),
      'Recall': recall_score(y_test,model.predict(X_test)),
      'F1-Score': f1_score(y_test,model.predict(X_test)),
      'auc': roc_auc_score(y_test,model.predict_proba(X_test)[:,1])
      })
  # scores = pd.DataFrame(scores_list)
  executions = executions.append(scores_list)

execução número 0




execução número 1




execução número 2




execução número 3




execução número 4




execução número 5




execução número 6




execução número 7




execução número 8




execução número 9




In [6]:
avg_scores_list = []

for name, model in models.items():

    avg_balanced_acc = 0

    model_metrics = executions.loc[executions['model'] == name]
    avg_balanced_acc = model_metrics['Balanced Accuracy'].sum() / len(model_metrics['Balanced Accuracy'])
    avg_precision = model_metrics['Precision'].sum() / len(model_metrics['Precision'])
    avg_recall = model_metrics['Recall'].sum() / len(model_metrics['Recall'])
    avg_f_score = model_metrics['F1-Score'].sum() / len(model_metrics['F1-Score'])
    avg_auc = model_metrics['auc'].sum() / len(model_metrics['auc'])

    avg_scores_list.append({
      'model': name,
      'model_obj': model,
      'amount_events': '0',
      'Balanced Accuracy': avg_balanced_acc,
      'Precision': avg_precision,
      'Recall': avg_recall,
      'F1-Score': avg_f_score,
      'auc': avg_auc
      })
avg_scores = pd.DataFrame(avg_scores_list)

In [7]:
ordered_scores = avg_scores.sort_values(by='auc', ascending=False)

In [8]:
ordered_scores

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
4,Random Forest,"(DecisionTreeClassifier(max_features='auto', r...",0,0.996376,0.996264,0.996756,0.99651,0.999907
6,KNN,KNeighborsClassifier(n_neighbors=3),0,0.993971,0.993191,0.995253,0.994221,0.997994
2,Decission Tree,DecisionTreeClassifier(),0,0.970101,0.969985,0.972445,0.971213,0.971861
5,Gradient Boosting Classifier,([DecisionTreeRegressor(criterion='friedman_ms...,0,0.749506,0.750263,0.775674,0.762757,0.839684
3,Adaboost,"(DecisionTreeClassifier(max_depth=1, random_st...",0,0.725083,0.727516,0.751941,0.739527,0.814447
0,Logistic Regression,LogisticRegression(max_iter=10000),0,0.721976,0.722815,0.753622,0.737897,0.81015
7,Gaussian NB,GaussianNB(),0,0.666179,0.631687,0.885735,0.737444,0.773858
1,Support Vector Machine (RBF Kernel),"SVC(C=1, gamma=0.01, kernel='sigmoid', max_ite...",0,0.652478,0.665127,0.6622,0.66366,0.709467


In [9]:
# Ensemble Voting Classifier
from sklearn.ensemble import VotingClassifier

# Criando o ensemble com os classificadores
ensemble_classifier = VotingClassifier(estimators=[
    (ordered_scores.iloc[0]['model'], ordered_scores.iloc[0]['model_obj']),
    (ordered_scores.iloc[1]['model'], ordered_scores.iloc[1]['model_obj']),
    (ordered_scores.iloc[2]['model'], ordered_scores.iloc[2]['model_obj'])
], voting='soft')

ensemble_classifier = ensemble_classifier.fit(X_train, y_train)

In [10]:
ensemble_scores_list = []
ensemble_scores_list.append({
    'model': 'Ensemble Classifier',
    'model_obj': ensemble_classifier,
    'amount_events': 0,
    'Balanced Accuracy': balanced_accuracy_score(y_test,ensemble_classifier.predict(X_test)),
    'Precision':  precision_score(y_test,ensemble_classifier.predict(X_test)),
    'Recall': recall_score(y_test,ensemble_classifier.predict(X_test)),
    'F1-Score': f1_score(y_test,ensemble_classifier.predict(X_test)),
    'auc': roc_auc_score(y_test,ensemble_classifier.predict_proba(X_test)[:,1])
})

In [11]:
ensemble_scores = pd.DataFrame(ensemble_scores_list)
ensemble_scores_list

[{'model': 'Ensemble Classifier',
  'model_obj': VotingClassifier(estimators=[('Random Forest', RandomForestClassifier()),
                               ('KNN', KNeighborsClassifier(n_neighbors=3)),
                               ('Decission Tree', DecisionTreeClassifier())],
                   voting='soft'),
  'amount_events': 0,
  'Balanced Accuracy': 0.9963232835894203,
  'Precision': 0.9953628336046569,
  'Recall': 0.9976266996291718,
  'F1-Score': 0.9964934808376136,
  'auc': 0.9998416634058891}]

In [12]:
ensemble_scores

Unnamed: 0,model,model_obj,amount_events,Balanced Accuracy,Precision,Recall,F1-Score,auc
0,Ensemble Classifier,"VotingClassifier(estimators=[('Random Forest',...",0,0.996323,0.995363,0.997627,0.996493,0.999842


In [17]:
models = pd.concat([ensemble_scores,avg_scores])
ordered_models = models.sort_values(by='auc', ascending=False)
best_model = ordered_models.iloc[0]['model_obj']
best_model

RandomForestClassifier()

In [1]:
import pickle

with open("../../../models/eventos-colunas/model-0.pkl", "wb") as f:
    pickle.dump(best_model, f)

with open("../../../scalers/eventos-colunas/scaler-0.pkl", "wb") as f:
    pickle.dump(scaler, f)

NameError: name 'best_model' is not defined