In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, cohen_kappa_score, classification_report, confusion_matrix, roc_auc_score, r2_score

# Definir seed para reprodutibilidade
seed = 42
np.random.seed(seed)


In [2]:
# Carregar os dados
df = pd.read_csv('../data/weaving_rejection_dataset_updated.csv')

In [3]:
# Separar features e target
X = df.drop('Rejection', axis=1)  # Features
y = df['Rejection']               # Target

In [4]:
# Dividir os dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

In [5]:
# Escalar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Função para avaliar o modelo
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    # Verifica se o classificador tem o método 'predict_proba' para calcular AUC
    if hasattr(model, "predict_proba"):
        y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probabilidade para a classe positiva
        auc = roc_auc_score(y_test, y_pred_proba)
        r2 = r2_score(y_test, y_pred_proba)
    else:
        auc = None  # AUC não será calculada se o modelo não suportar 'predict_proba'
        r2 = None   # R^2 não será calculado se o modelo não suportar 'predict_proba'

    accuracy = accuracy_score(y_test, y_pred)
    kappa = cohen_kappa_score(y_test, y_pred)
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Cohen's Kappa: {kappa:.4f}")
    if auc is not None:
        print(f"AUC: {auc:.4f}")
    if r2 is not None:
        print(f"R^2: {r2:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    return accuracy, kappa, auc, r2

In [7]:
print(y_train.unique())
print(y_test.unique())

[  10    3  180    0   58   92   37   59   17   36   30    6   45   26
    4    7    8   13   12   52   94    9    2   22   19  210    5   25
  124   28   27   85   88   73 1180  102   31   68   11   50   66   14
   35   29  185   98   83   15   39  156   48  453  110   71   23   18
  297  198   65   33  301  275   16   44  130  530   20   79  247   51
  188   62   80  125   42  313   21  132  117   40   72   24   49  211
  169  621  284  906   38   32   34    1  146  212  184  203   64   57
   67  324 1760  224   75   47   90  128  194   46  104  350  139  186
   74  325   78  244  287  152  924  222   69  335  264  505  196  141
  105   96   53 1043  254 1469  722  281  166   55  269  134   54  249
  268   91  413   60  167 1071   70  885  237   43   41   99  154  187
   61  108  183 1036  391  285  107  317   63 2062  133   56  106  290
  123  116  140 1087  311  189   86 1414  533 1427  291  200   82  496
   76  213  255  343  267  233  276  129  163  103  138  266  365  220
  547 

In [8]:
# Exemplo para classificação binária: converter para 0 e 1
y_train = (y_train > 0.5).astype(int)
y_test = (y_test > 0.5).astype(int)

In [9]:
# Rede neural sem PCA
print("Rede Neural Sem PCA:")
mlp_no_pca = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=300, random_state=seed)
mlp_no_pca.fit(X_train_scaled, y_train)
evaluate_model(mlp_no_pca, X_test_scaled, y_test)


Rede Neural Sem PCA:
Accuracy: 0.8885
Cohen's Kappa: 0.7392
AUC: 0.9487
R^2: 0.6152
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82      1381
           1       0.91      0.92      0.92      3021

    accuracy                           0.89      4402
   macro avg       0.87      0.87      0.87      4402
weighted avg       0.89      0.89      0.89      4402

Confusion Matrix:
[[1118  263]
 [ 228 2793]]




(0.888459791004089, 0.7391714963503737, 0.9486903286935933, 0.6152242077057686)

In [10]:
# Aplicando PCA com 95% de variância explicada
pca = PCA(0.95, random_state=seed)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [11]:
# Rede neural com PCA
print("\nRede Neural Com PCA:")
mlp_with_pca = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=300, random_state=seed)
mlp_with_pca.fit(X_train_pca, y_train)
evaluate_model(mlp_with_pca, X_test_pca, y_test)


Rede Neural Com PCA:
Accuracy: 0.8751
Cohen's Kappa: 0.6976
AUC: 0.9429
R^2: 0.5857
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.73      0.78      1381
           1       0.88      0.94      0.91      3021

    accuracy                           0.88      4402
   macro avg       0.87      0.83      0.85      4402
weighted avg       0.87      0.88      0.87      4402

Confusion Matrix:
[[1003  378]
 [ 172 2849]]




(0.8750567923671059, 0.6975954271222591, 0.9429343377434473, 0.585651653576556)