# 1 - Import das bibliotecas

In [2]:
import numpy              as np
import pandas             as pd
from sklearn.neighbors    import KNeighborsClassifier
from sklearn.tree         import DecisionTreeClassifier
from sklearn.ensemble     import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics      import accuracy_score, precision_score, recall_score, f1_score

# 2 - Import das Bases

In [3]:
# Carregar os conjuntos de dados
X_training = pd.read_csv('X_Training.csv')
y_training = pd.read_csv('y_training.csv')
X_validation = pd.read_csv('X_validation.csv')
y_validation = pd.read_csv('y_validation.csv')
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')

# 3 - Classificação - Base Teste / Base Treinamento / Base Validação

In [4]:
# Hiperparâmetros com valores especificados
n_estimators_rf = 100
max_depth_rf = 10
n_neighbors_knn = 5
C_lr = 1.0
solver_lr = 'lbfgs'
max_iter_lr = 100
max_depth_dt = 5

# Inicializar modelos
models = [
    ("K-Neighbors Classifier", KNeighborsClassifier(n_neighbors=n_neighbors_knn)),
    ("Decision Tree Classifier", DecisionTreeClassifier(max_depth=max_depth_dt)),
    ("Random Forest Classifier", RandomForestClassifier(n_estimators=n_estimators_rf, max_depth=max_depth_rf)),
    ("Logistic Regression", LogisticRegression(C=C_lr, solver=solver_lr, max_iter=max_iter_lr))
]
 
# Resultados em um DataFrame
results = pd.DataFrame(columns=["Model", "Metric", "Validation Score", "Test Score"])

for model_name, model in models:
    # Treinamento
    model.fit(X_training, y_training)
    
    # Validação
    y_validation_pred = model.predict(X_validation)
    accuracy_validation = accuracy_score(y_validation, y_validation_pred)
    precision_validation = precision_score(y_validation, y_validation_pred)
    recall_validation = recall_score(y_validation, y_validation_pred)
    f1_validation = f1_score(y_validation, y_validation_pred)
    
    # Teste
    y_test_pred = model.predict(X_test)
    accuracy_test = accuracy_score(y_test, y_test_pred)
    precision_test = precision_score(y_test, y_test_pred)
    recall_test = recall_score(y_test, y_test_pred)
    f1_test = f1_score(y_test, y_test_pred)
    
    # Adicionar resultados ao DataFrame
    results = pd.concat([results,
                        pd.DataFrame([[model_name, "Accuracy", accuracy_validation, accuracy_test],
                                      [model_name, "Precision", precision_validation, precision_test],
                                      [model_name, "Recall", recall_validation, recall_test],
                                      [model_name, "F1-Score", f1_validation, f1_test]],
                                     columns=results.columns)],
                        ignore_index=True)

# Exibir os resultados
results


  return self._fit(X, y)
  model.fit(X_training, y_training)
  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Model,Metric,Validation Score,Test Score
0,K-Neighbors Classifier,Accuracy,0.675665,0.670529
1,K-Neighbors Classifier,Precision,0.631775,0.630816
2,K-Neighbors Classifier,Recall,0.603163,0.601232
3,K-Neighbors Classifier,F1-Score,0.617138,0.615669
4,Decision Tree Classifier,Accuracy,0.906689,0.906191
5,Decision Tree Classifier,Precision,0.906531,0.907367
6,Decision Tree Classifier,Recall,0.874898,0.875671
7,Decision Tree Classifier,F1-Score,0.890434,0.891237
8,Random Forest Classifier,Accuracy,0.951285,0.950913
9,Random Forest Classifier,Precision,0.954114,0.952402


In [5]:
# Salvar o DataFrame results_regression em um arquivo CSV
results.to_csv("resultados_classificacao.csv", index=False)
