In [1]:
# Importação das bibliotecas necessárias
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Binarizer
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, roc_auc_score, cohen_kappa_score
from sklearn.ensemble import GradientBoostingRegressor

# Fixando a seed para reprodutibilidade
np.random.seed(42)

In [2]:
# Função para calcular o R² e outras métricas para regressão
def evaluate_regression_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    # Transformar as previsões contínuas em classes binárias para calcular AUC, Cohen's Kappa e Accuracy
    binarizer = Binarizer()
    y_true_bin = binarizer.fit_transform(y_true.values.reshape(-1, 1)).flatten()
    y_pred_bin = binarizer.transform(y_pred.reshape(-1, 1)).flatten()
    
    try:
        auc_score = roc_auc_score(y_true_bin, y_pred_bin)
    except ValueError:
        auc_score = None  # Se o cálculo do AUC não for possível para algum caso específico

    try:
        cohen_k = cohen_kappa_score(y_true_bin, y_pred_bin)
    except ValueError:
        cohen_k = None  # Se o cálculo do Cohen's Kappa não for possível para algum caso específico

    # Cálculo da acurácia
    accuracy = accuracy_score(y_true_bin, y_pred_bin)
    
    return mse, mae, r2, auc_score, cohen_k, accuracy

In [3]:
# Carregar os dados
df = pd.read_csv('../data/weaving_rejection_dataset_updated.csv')

In [4]:
# Separando as variáveis independentes (X) e a variável dependente (y)
X = df.drop('Rejection', axis=1)
y = df['Rejection']

In [5]:
# Dividindo os dados em treino e teste com seed fixa
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Escalando as variáveis
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
### 1. Modelo de Gradient Boosting Regressor sem PCA
gbr = GradientBoostingRegressor(random_state=42)
gbr.fit(X_train_scaled, y_train)

GradientBoostingRegressor(random_state=42)

In [8]:
# Previsão
y_pred = gbr.predict(X_test_scaled)

In [9]:
# Avaliação do modelo
mse, mae, r2, auc_score, cohen_k, accuracy = evaluate_regression_model(y_test, y_pred)

In [11]:
print(f"### Gradient Boosting Regressor sem PCA ###")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R²: {r2:.4f}")
if auc_score is not None:
    print(f"AUC: {auc_score:.4f}")
else:
    print("AUC: N/A")
if cohen_k is not None:
    print(f"Cohen's Kappa: {cohen_k:.4f}")
else:
    print("Cohen's Kappa: N/A")
print(f"Accuracy: {accuracy:.4f}")

### Gradient Boosting Regressor sem PCA ###
Mean Squared Error (MSE): 2358.7944
Mean Absolute Error (MAE): 23.1538
R²: 0.9215
AUC: 0.5852
Cohen's Kappa: 0.2184
Accuracy: 0.7385


In [12]:
### 2. Modelo de Gradient Boosting Regressor com PCA
# Aplicar PCA
pca = PCA(n_components=0.95, random_state=42)  # Mantém 95% da variância
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [13]:
# Treinar o modelo com PCA
gbr_pca = GradientBoostingRegressor(random_state=42)
gbr_pca.fit(X_train_pca, y_train)

GradientBoostingRegressor(random_state=42)

In [14]:
# Previsão
y_pred_pca = gbr_pca.predict(X_test_pca)

In [15]:
# Avaliação do modelo com PCA
mse_pca, mae_pca, r2_pca, auc_score_pca, cohen_k_pca, accuracy_pca = evaluate_regression_model(y_test, y_pred_pca)

In [16]:
print(f"\n### Gradient Boosting Regressor com PCA ###")
print(f"Mean Squared Error (MSE): {mse_pca:.4f}")
print(f"Mean Absolute Error (MAE): {mae_pca:.4f}")
print(f"R²: {r2_pca:.4f}")
if auc_score_pca is not None:
    print(f"AUC: {auc_score_pca:.4f}")
else:
    print("AUC: N/A")
if cohen_k_pca is not None:
    print(f"Cohen's Kappa: {cohen_k_pca:.4f}")
else:
    print("Cohen's Kappa: N/A")
print(f"Accuracy: {accuracy_pca:.4f}")


### Gradient Boosting Regressor com PCA ###
Mean Squared Error (MSE): 2790.6743
Mean Absolute Error (MAE): 26.8845
R²: 0.9071
AUC: 0.5260
Cohen's Kappa: 0.0699
Accuracy: 0.7035
