In [1]:
# Importação das bibliotecas necessárias
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, roc_auc_score, cohen_kappa_score
from sklearn.preprocessing import StandardScaler

# Fixando a seed para reprodutibilidade
np.random.seed(42)

In [2]:
# Função para calcular o R^2 para classificadores
def pseudo_r2(y_true, y_pred_proba):
    ll_null = np.sum(np.log(np.mean(y_true) * np.ones(len(y_true))))  # log-likelihood nula
    ll_model = np.sum(np.log(y_pred_proba))  # log-likelihood do modelo
    return 1 - (ll_model / ll_null)

In [3]:
# Função para avaliar o modelo
def evaluate_model(y_true, y_pred, y_pred_proba):
    accuracy = accuracy_score(y_true, y_pred)
    
    # AUC para multiclasse, especificando 'ovr' (one-vs-rest)
    try:
        auc_score = roc_auc_score(y_true, y_pred_proba, multi_class='ovr')
    except ValueError:
        auc_score = None  # Se o cálculo do AUC não for possível para algum caso específico
    
    cohen_k = cohen_kappa_score(y_true, y_pred)
    r2 = pseudo_r2(y_true, y_pred_proba)

    return accuracy, auc_score, cohen_k, r2

In [4]:
# Carregar os dados
df = pd.read_csv('../data/weaving_rejection_dataset_updated.csv')

In [5]:
# Separando as variáveis independentes (X) e a variável dependente (y)
X = df.drop('Rejection', axis=1)
y = df['Rejection']

In [6]:
# Dividindo os dados em treino e teste com seed fixa
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
# Escalando as variáveis
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
### 1. Modelo Decision Tree sem PCA
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train_scaled, y_train)

DecisionTreeClassifier(random_state=42)

In [10]:
# Previsão
y_pred = decision_tree.predict(X_test_scaled)
y_pred_proba = decision_tree.predict_proba(X_test_scaled)  # Previsões de probabilidade para todas as classes

In [11]:
# Avaliação do modelo
accuracy, auc_score, cohen_k, r2 = evaluate_model(y_test, y_pred, y_pred_proba)

  ll_model = np.sum(np.log(y_pred_proba))  # log-likelihood do modelo


In [12]:
print(f"### Decision Tree sem PCA ###")
print(f"Accuracy: {accuracy:.4f}")
if auc_score is not None:
    print(f"AUC: {auc_score:.4f}")
else:
    print("AUC: N/A")
print(f"Cohen's Kappa: {cohen_k:.4f}")
print(f"Pseudo R²: {r2:.4f}")

### Decision Tree sem PCA ###
Accuracy: 0.8021
AUC: N/A
Cohen's Kappa: 0.7795
Pseudo R²: inf


In [13]:
### 2. Modelo Decision Tree com PCA
# Aplicar PCA
pca = PCA(n_components=0.95, random_state=42)  # Mantém 95% da variância
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [14]:
# Treinar o modelo com PCA
decision_tree_pca = DecisionTreeClassifier(random_state=42)
decision_tree_pca.fit(X_train_pca, y_train)

DecisionTreeClassifier(random_state=42)

In [15]:
# Previsão
y_pred_pca = decision_tree_pca.predict(X_test_pca)
y_pred_proba_pca = decision_tree_pca.predict_proba(X_test_pca)

In [16]:
# Avaliação do modelo com PCA
accuracy_pca, auc_score_pca, cohen_k_pca, r2_pca = evaluate_model(y_test, y_pred_pca, y_pred_proba_pca)

  ll_model = np.sum(np.log(y_pred_proba))  # log-likelihood do modelo


In [17]:
print(f"\n### Decision Tree com PCA ###")
print(f"Accuracy: {accuracy_pca:.4f}")
if auc_score_pca is not None:
    print(f"AUC: {auc_score_pca:.4f}")
else:
    print("AUC: N/A")
print(f"Cohen's Kappa: {cohen_k_pca:.4f}")
print(f"Pseudo R²: {r2_pca:.4f}")


### Decision Tree com PCA ###
Accuracy: 0.7990
AUC: N/A
Cohen's Kappa: 0.7764
Pseudo R²: inf
