In [2]:
# 1. Importação das bibliotecas necessárias
# Bibliotecas gerais
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Machine Learning
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Deep Learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
def carregar_e_preparar_dados(caminho_csv, coluna_target):
    """
    Função para carregar o dataset e separar em X (features) e y (rótulo).

    Args:
    - caminho_csv: caminho do arquivo CSV
    - coluna_target: nome da coluna que representa o rótulo

    Returns:
    - X_train, X_test, y_train, y_test: conjuntos de treino e teste
    """
    df = pd.read_csv(caminho_csv)

    # Separando variáveis independentes e dependente
    X = df.drop(columns=[coluna_target])
    y = df[coluna_target]

    # Dividindo em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test


In [None]:
def modelo_arvore_decisao(X_train, X_test, y_train, y_test):
    """
    Treina e ajusta uma árvore de decisão com poda usando Minimal Cost-Complexity.
    """

    # Treina árvore inicial
    clf = DecisionTreeClassifier(random_state=0)
    path = clf.cost_complexity_pruning_path(X_train, y_train)
    ccp_alphas = path.ccp_alphas

    # GridSearch para encontrar melhor alpha
    param_grid = {'ccp_alpha': ccp_alphas}
    grid_search = GridSearchCV(DecisionTreeClassifier(random_state=0), param_grid=param_grid, cv=7, n_jobs=-1, verbose=0)
    grid_search.fit(X_train, y_train)

    best_clf = grid_search.best_estimator_

    # Avaliações
    y_pred_train = best_clf.predict(X_train)
    y_pred_test = best_clf.predict(X_test)

    Ein = 1 - accuracy_score(y_train, y_pred_train)
    Eout = 1 - accuracy_score(y_test, y_pred_test)

    print("Árvore de Decisão")
    print(f"Melhor alpha: {grid_search.best_params_['ccp_alpha']}")
    print(f"Ein: {Ein:.4f}")
    print(f"Eout: {Eout:.4f}")
    print("\nRelatório de Classificação:")
    print(classification_report(y_test, y_pred_test))

    # Plot da árvore
    plt.figure(figsize=(20,10))
    plot_tree(best_clf, filled=True, feature_names=X_train.columns, class_names=[str(i) for i in np.unique(y_train)])
    plt.show()

    return best_clf

In [None]:
def modelo_svm(X_train, X_test, y_train, y_test):
    """
    Treina e ajusta um modelo SVM com ajuste de C e gamma via GridSearchCV.
    """

    param_grid = {
        'C': [1, 5, 10, 50],
        'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
    }

    grid = GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid, cv=5, n_jobs=-1, verbose=0)
    grid.fit(X_train, y_train)

    best_svm = grid.best_estimator_

    y_pred_train = best_svm.predict(X_train)
    y_pred_test = best_svm.predict(X_test)

    Ein = 1 - accuracy_score(y_train, y_pred_train)
    Eout = 1 - accuracy_score(y_test, y_pred_test)

    print("SVM")
    print(f"Melhores parâmetros: {grid.best_params_}")
    print(f"Ein: {Ein:.4f}")
    print(f"Eout: {Eout:.4f}")
    print("\nRelatório de Classificação:")
    print(classification_report(y_test, y_pred_test))

    # Número de vetores de suporte
    print(f"Número total de vetores de suporte: {sum(best_svm.n_support_)}")

    return best_svm

In [None]:
def modelo_rede_neural(X_train, X_test, y_train, y_test, input_dim, output_dim):
    """
    Treina um modelo de Rede Neural simples.
    """

    # Modelo sequencial básico
    model = Sequential()

    # Adicionar camadas
    model.add(Dense(16, activation='relu', input_dim=input_dim))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(output_dim, activation='softmax')) # softmax para multi-classe

    # Compilar
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Treinamento
    history = model.fit(
        X_train, y_train,
        validation_split=0.2,
        epochs=100,
        batch_size=32,
        callbacks=[early_stopping],
        verbose=0
    )

    # Avaliações
    train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

    Ein = 1 - train_acc
    Eout = 1 - test_acc

    print("Rede Neural")
    print(f"Ein: {Ein:.4f}")
    print(f"Eout: {Eout:.4f}")

    # Plotando curva de treino
    plt.figure(figsize=(8,6))
    plt.plot(history.history['loss'], label='Loss treino')
    plt.plot(history.history['val_loss'], label='Loss validação')
    plt.xlabel('Épocas')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss x Épocas')
    plt.show()

    return model