In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# variável usada para remover warnings do jupyter notebook
import warnings
warnings.filterwarnings('ignore')

# leitura e transformação do arquivo em dataframe
dados = pd.read_csv('dadosbinariosliteratura.csv')
#dados = dados.drop_duplicates()

dados = dados.drop(columns=['id', 'Unnamed: 0'])
dados['winner'] = dados['winner'].apply(lambda item: 1 if item == 'radiant' else 0)
dados = dados.astype(int)
dados

Unnamed: 0,winner,duration,hero1,hero2,hero3,hero4,hero5,hero6,hero7,hero8,...,hero120,hero121,hero123,hero126,hero128,hero129,hero135,hero136,hero137,hero138
0,1,2309,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1228,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1051,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,2518,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1563,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,1,1839,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1383,1,2340,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1384,1,1316,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1385,0,1153,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [2]:
import mlflow
import mlflow.sklearn
import os

# função auxiliar para criar um experimento no mlflow
def criarExperimento():
    # variáveis de conexão com o mlflow
    os.environ['MLFLOW_TRACKING_URI'] = "https://dagshub.com/stardotwav/Dota2Predictor.mlflow"
    os.environ['MLFLOW_TRACKING_USERNAME'] = "stardotwav"
    os.environ['MLFLOW_TRACKING_PASSWORD'] = "7dcb76dec399290fb3a125d02a9a7b39e2c03d90"
    
    mlflow.set_tracking_uri(os.environ['MLFLOW_TRACKING_URI'])
    mlflow.set_experiment(experiment_name='Resultados Literatura')

    tags = {
            "Projeto": "Projeto de Engenharia de Aprendizado de Máquina",
            "team": "Estela",
            "dataset": "dota2teamsprofessionals"
           }

In [3]:
# função auxiliar que salva os dados no MLFlow
def modelosMLFlow(acuracia, precisao, modelo, nomeModelo):
    with  mlflow.start_run(run_name=nomeModelo):
        # registro das métricas
        mlflow.log_metric("Acurácia", acuracia)
        mlflow.log_metric("Precisão", precisao)
        
        # registro do modelo
        mlflow.sklearn.log_model(modelo, "Modelo")
        mlflow.end_run()

In [4]:
# import utilizado para realizar a separação do treino e teste
from sklearn.model_selection import train_test_split

# função auxiliar que realiza a separação do dataset em treino e teste
def separacaoModelo(dataset, target):
    X = dataset.drop(target, axis=1)
    Y = dataset[target]
    xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size=0.2, random_state=42)
    
    return xTrain, xTest, yTrain, yTest

In [5]:
# gerando base de dados
xTrain, xTest, yTrain, yTest = separacaoModelo(dados, 'winner')

In [6]:
# criando experimento
criarExperimento()

In [16]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

# experimento naive bayes
naive = GaussianNB()
naive.fit(xTrain, yTrain)
pred = naive.predict(xTest)

acuracia = accuracy_score(yTest, pred)
precision = precision_score(yTest, pred)
print(f'Acuracia: {acuracia} Precisão: {precision}')
modelosMLFlow(acuracia, precision, naive, 'Naive Bayes Abordagem 1')

Acuracia: 0.5719424460431655 Precisão: 0.684


In [8]:
# experimento knn
knn = KNeighborsClassifier(n_neighbors=100)
knn.fit(xTrain, yTrain)
pred = knn.predict(xTest)

acuracia = accuracy_score(yTest, pred)
precision = precision_score(yTest, pred)
print(f'Acuracia: {acuracia} Precisão: {precision}')
modelosMLFlow(acuracia, precision, knn, 'KNN Abordagem 1')

Acuracia: 0.460431654676259 Precisão: 0.4


In [17]:
# experimento árvore de decisão
dt = DecisionTreeClassifier(min_impurity_decrease=0.1, random_state=42)
dt.fit(xTrain, yTrain)
pred = dt.predict(xTest)

acuracia = accuracy_score(yTest, pred)
precision = precision_score(yTest, pred)
print(f'Acuracia: {acuracia} Precisão: {precision}')
modelosMLFlow(acuracia, precision, dt, 'Decision Tree Abordagem 1')

Acuracia: 0.5323741007194245 Precisão: 0.5678


In [10]:
dados = dados.drop(columns=['duration'])
dados

Unnamed: 0,winner,hero1,hero2,hero3,hero4,hero5,hero6,hero7,hero8,hero9,...,hero120,hero121,hero123,hero126,hero128,hero129,hero135,hero136,hero137,hero138
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1383,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1384,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1385,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
xTrain, xTest, yTrain, yTest = separacaoModelo(dados, 'winner')

In [19]:
# experimento naive bayes
naive = GaussianNB()
naive.fit(xTrain, yTrain)
pred = naive.predict(xTest)

acuracia = accuracy_score(yTest, pred)
precision = precision_score(yTest, pred)
print(f'Acuracia: {acuracia} Precisão: {precision}')
modelosMLFlow(acuracia, precision, naive, 'Naive Bayes Abordagem 2')

Acuracia: 0.5719424460431655 Precisão: 0.6042


In [13]:
# experimento knn
knn = KNeighborsClassifier(n_neighbors=100)
knn.fit(xTrain, yTrain)
pred = knn.predict(xTest)

acuracia = accuracy_score(yTest, pred)
precision = precision_score(yTest, pred)
print(f'Acuracia: {acuracia} Precisão: {precision}')
modelosMLFlow(acuracia, precision, knn, 'KNN Abordagem 2')

Acuracia: 0.5431654676258992 Precisão: 0.5099337748344371


In [18]:
# experimento árvore de decisão
dt = DecisionTreeClassifier(min_impurity_decrease=0.1, random_state=42)
dt.fit(xTrain, yTrain)
pred = dt.predict(xTest)

acuracia = accuracy_score(yTest, pred)
precision = precision_score(yTest, pred)
print(f'Acuracia: {acuracia} Precisão: {precision}')
modelosMLFlow(acuracia, precision, dt, 'Decision Tree Abordagem 2')

Acuracia: 0.5323741007194245 Precisão: 0.5678
