In [296]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [297]:
dataset = pd.read_csv('data/dataset.csv')

# encode x-> 2, o-> 1, b-> 0
pd.set_option('future.no_silent_downcasting', True)
encoded_dataset = dataset.replace({'x': 2, 'o': 1, 'b': 0})

# encode Xganha->1, 0ganha->2, Temjogo->3, Empate->4
encoded_dataset = encoded_dataset.replace({'Xganha': 1, 'Oganha': 2, 'Temjogo': 3, 'Empate': 4}) 

print(encoded_dataset.head(10))

# Separa em treino e teste

# Separa as features (X) e o rótulo (y)
X = encoded_dataset.drop('result', axis=1)
y = encoded_dataset['result'] 

X = X.astype(int)
y = y.astype(int)

# 80% treino e 20% temporário
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

# Divide o conjunto temporário em 50% validação e 50% teste
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Total 80% treino 10% validação e 10% teste

# Exibe o tamanho dos conjuntos
print(f"Tamanho do conjunto de treino: {X_train.shape} - 80%")
print(f"Tamanho do conjunto de validação: {X_val.shape} - 10%")
print(f"Tamanho do conjunto de teste: {X_test.shape} - 10%")

   1  2  3  4  5  6  7  8  9 result
0  0  0  1  1  2  1  2  2  2      1
1  0  0  1  2  1  1  2  2  0      3
2  0  2  2  2  1  1  1  2  1      3
3  2  0  2  1  0  1  0  2  1      3
4  2  1  0  2  0  2  0  0  1      3
5  0  0  1  0  1  2  0  2  0      3
6  0  2  2  0  1  1  1  2  0      3
7  2  0  1  0  0  1  0  2  0      3
8  0  0  2  1  0  1  2  0  0      3
9  0  2  0  1  1  0  2  0  0      3
Tamanho do conjunto de treino: (3898, 9) - 80%
Tamanho do conjunto de validação: (487, 9) - 10%
Tamanho do conjunto de teste: (488, 9) - 10%


In [298]:
# Exibir a contagem de valores para o conjunto de treino e os rótulos

print("\nDistribuição dos rótulos (y_train):")
print("\n1: Xganha, 2: Oganha, 3: Temjogo, 4: Empate")
print(y_train.value_counts())


Distribuição dos rótulos (y_train):

1: Xganha, 2: Oganha, 3: Temjogo, 4: Empate
result
3    2908
1     703
2     276
4      11
Name: count, dtype: int64


In [299]:
# Exibir a contagem de valores para o conjunto de validacao e os rótulos

print("\nDistribuição dos rótulos (y_val):")
print("\n1: Xganha, 2: Oganha, 3: Temjogo, 4: Empate")
print(y_val.value_counts())


Distribuição dos rótulos (y_val):

1: Xganha, 2: Oganha, 3: Temjogo, 4: Empate
result
3    360
1     94
2     32
4      1
Name: count, dtype: int64


In [300]:
# Exibir a contagem de valores para o conjunto de teste e os rótulos
print("\nDistribuição dos rótulos (y_test):")
print("\n1: Xganha, 2: Oganha, 3: Temjogo, 4: Empate")
print(y_test.value_counts())


Distribuição dos rótulos (y_test):

1: Xganha, 2: Oganha, 3: Temjogo, 4: Empate
result
3    383
1     83
2     22
Name: count, dtype: int64


In [301]:
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

# calcula a acurácia para cada valor de k
k_values = range(1, 11) 
accuracy_scores = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=5)  # 5-fold cross-validation
    mean_score = scores.mean()
    accuracy_scores.append(mean_score)
    print(f"k={k}, Accuracy={mean_score:.4f}")

# acha melhor k
best_k = k_values[np.argmax(accuracy_scores)]
best_accuracy = max(accuracy_scores)

print(f"\nBest k value: {best_k}")
print(f"Best accuracy: {best_accuracy:.4f}")


k=1, Accuracy=0.8704
k=2, Accuracy=0.8330
k=3, Accuracy=0.8923
k=4, Accuracy=0.8920
k=5, Accuracy=0.8974
k=6, Accuracy=0.8992
k=7, Accuracy=0.8969
k=8, Accuracy=0.9012
k=9, Accuracy=0.8915
k=10, Accuracy=0.8966

Best k value: 8
Best accuracy: 0.9012


In [302]:
from sklearn.metrics import accuracy_score

# Treina o modelo
knn = KNeighborsClassifier(n_neighbors=8)
knn.fit(X_train, y_train)

# Fazer previsões no conjunto de validação
y_pred = knn.predict(X_val)

# Exibir o relatório de classificação
report = classification_report(y_val, y_pred)
print(report)

val_predictions = knn.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)
print(f"Validation Accuracy: {val_accuracy:.4f}")

test_predictions = knn.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Salvando o modelo e a acurácia juntos
model_data = {
    'model': knn,
    'accuracy': test_accuracy
}

joblib.dump(model_data, 'models/knn_model.joblib')


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           1       0.92      0.89      0.91        94
           2       0.78      0.22      0.34        32
           3       0.92      0.99      0.95       360
           4       0.00      0.00      0.00         1

    accuracy                           0.92       487
   macro avg       0.66      0.53      0.55       487
weighted avg       0.91      0.92      0.90       487

Validation Accuracy: 0.9179
Test Accuracy: 0.9303


['models/knn_model.joblib']

In [303]:
from sklearn.neural_network import MLPClassifier

# Treina o modelo
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(100,), learning_rate_init=0.001, momentum=0.01)
mlp.fit(X_train, y_train)

# Fazer previsões no conjunto de validação
y_pred = mlp.predict(X_val)

# Exibir o relatório de classificação
report = classification_report(y_val, y_pred)
print(report)

val_predictions = mlp.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)
print(f"Validation Accuracy: {val_accuracy:.4f}")

test_predictions = mlp.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Salvando o modelo e a acurácia juntos
model_data = {
    'model': mlp,
    'accuracy': test_accuracy
}

# Salvando o modelo
joblib.dump(model_data, 'models/mlp_model.joblib')

              precision    recall  f1-score   support

           1       0.95      0.95      0.95        94
           2       0.67      0.50      0.57        32
           3       0.95      0.97      0.96       360
           4       1.00      1.00      1.00         1

    accuracy                           0.93       487
   macro avg       0.89      0.85      0.87       487
weighted avg       0.93      0.93      0.93       487

Validation Accuracy: 0.9343
Test Accuracy: 0.9426


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


['models/mlp_model.joblib']

In [304]:
from sklearn.tree import DecisionTreeClassifier

# Treina o modelo
dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train, y_train)

# Fazer previsões no conjunto de validação
y_pred = dtree.predict(X_val)

# Exibir o relatório de classificação
report = classification_report(y_val, y_pred)
print(report)

val_predictions = dtree.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)
print(f"Validation Accuracy: {val_accuracy:.4f}")

test_predictions = dtree.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Salvando o modelo e a acurácia juntos
model_data = {
    'model': dtree,
    'accuracy': test_accuracy
}

# Salvando o modelo
joblib.dump(model_data, 'models/decision_tree_model.joblib')

              precision    recall  f1-score   support

           1       0.71      0.71      0.71        94
           2       0.17      0.16      0.16        32
           3       0.88      0.89      0.89       360
           4       0.00      0.00      0.00         1

    accuracy                           0.80       487
   macro avg       0.44      0.44      0.44       487
weighted avg       0.80      0.80      0.80       487

Validation Accuracy: 0.8049
Test Accuracy: 0.8381


['models/decision_tree_model.joblib']

In [305]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)  # 100 árvores
rf.fit(X_train, y_train)

# Fazer previsões no conjunto de validação
y_pred = rf.predict(X_val)

# Exibir o relatório de classificação
report = classification_report(y_val, y_pred)
print(report)

val_predictions = rf.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)
print(f"Validation Accuracy: {val_accuracy:.4f}")

test_predictions = rf.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Salvando o modelo e a acurácia juntos
model_data = {
    'model': rf,
    'accuracy': test_accuracy
}

# Salvando o modelo
joblib.dump(model_data, 'models/random_forest_model.joblib')

              precision    recall  f1-score   support

           1       0.92      0.86      0.89        94
           2       0.86      0.19      0.31        32
           3       0.92      1.00      0.96       360
           4       0.00      0.00      0.00         1

    accuracy                           0.92       487
   macro avg       0.67      0.51      0.54       487
weighted avg       0.91      0.92      0.90       487

Validation Accuracy: 0.9179
Test Accuracy: 0.9488


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


['models/random_forest_model.joblib']