# Importação dos pacotes

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [14]:
# ignorar avisos
import warnings
warnings.filterwarnings("ignore")

# Modelos

In [15]:
# Modelos
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Carregamento e Separação da Base de Dados

In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
# 1. Carregar a base
mushroomsCsvPath = '/content/drive/MyDrive/Classroom/Aprendizado de Máquina - ADS - 2025.1 Turma: 20251.4.ADS.CNT.1N   Diário: SUP.06276 (122721)/mushroom-classification/mushrooms.csv'


df = pd.read_csv(mushroomsCsvPath)

# Separar o rótulo da classe
y = df['class']
X = df.drop('class', axis=1)

X.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,w,w,p,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,w,w,p,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,w,w,p,w,o,e,n,a,g


## Mantendo variáveis fracas (baixa correlação)

In [28]:
# Mantendo apenas variáveis fracamente correlacionada com a classe como entrada
weak_features = ['population', 'habitat', 'cap-shape']
X = X[weak_features]

X.head()

Unnamed: 0,population,habitat,cap-shape
0,s,u,x
1,n,g,x
2,n,m,b
3,s,u,x
4,a,g,x


## Aplicando ruído ao alvo

In [30]:
# Adicionar ruído ao Alvo (Classe)
y_noisy = y.copy()
# Inverter a classe de uma porcentagem pequena dos exemplos (ex: 20%).
flip_idx = np.random.choice(len(y), size=int(0.2 * len(y)), replace=False)
y_noisy.iloc[flip_idx] = y_noisy.iloc[flip_idx].map({'e': 'p', 'p': 'e'})

# 🧠 XGBoost com Label Encoding


---


Aplicar Label Encoding em todas as colunas

In [31]:
# -------------------------------
# 🧠 XGBoost com Label Encoding
# -------------------------------
# Aplicar Label Encoding em todas as colunas
X_xgb = X.copy()
le_xgb = LabelEncoder()

for col in X_xgb.columns:
    X_xgb[col] = le_xgb.fit_transform(X_xgb[col])

y_noisy = LabelEncoder().fit_transform(y)

# Treino/teste
X_train_xgb, X_test_xgb, y_train_xgb, y_test_xgb = train_test_split(X_xgb, y_noisy, test_size=0.1, random_state=42)

# Modelo XGBoost
xgb_model = XGBClassifier(max_depth=1, n_estimators=5, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train_xgb, y_train_xgb)
xgb_preds = xgb_model.predict(X_test_xgb)

acc_xgb = accuracy_score(y_test_xgb, xgb_preds)
print(f"🎯 Acurácia - XGBoost (Label Encoding): {acc_xgb:.4f}")

# Classification report
print("\n📋 Relatório de Classificação - XGBoost:")
print(classification_report(y_test_xgb, xgb_preds, target_names=['Venenoso', 'Comestível']))

🎯 Acurácia - XGBoost (Label Encoding): 0.6470

📋 Relatório de Classificação - XGBoost:
              precision    recall  f1-score   support

    Venenoso       0.81      0.44      0.57       433
  Comestível       0.58      0.88      0.70       380

    accuracy                           0.65       813
   macro avg       0.69      0.66      0.64       813
weighted avg       0.70      0.65      0.63       813



# 🤖 SVM com One-Hot Encoding
---
Aplicar One-Hot Encoding

In [33]:
# -------------------------------
# 🤖 SVM com One-Hot Encoding
# -------------------------------
# Aplicar One-Hot Encoding
X_svm = pd.get_dummies(X)

# Codificar rótulos
y_noisy = LabelEncoder().fit_transform(y)

# Treino/teste
X_train_svm, X_test_svm, y_train_svm, y_test_svm = train_test_split(X_svm, y_noisy, test_size=0.1, random_state=42)

# Modelo SVM (com kernel RBF por padrão)
svm_model = SVC(kernel='poly', degree=5, C=0.001)
svm_model.fit(X_train_svm, y_train_svm)
svm_preds = svm_model.predict(X_test_svm)

acc_svm = accuracy_score(y_test_svm, svm_preds)
print(f"🎯 Acurácia - SVM (One-Hot Encoding): {acc_svm:.4f}")

# Classification report
print("\n📋 Relatório de Classificação - SVM:")
print(classification_report(y_test_svm, svm_preds, target_names=['Venenoso', 'Comestível']))


🎯 Acurácia - SVM (One-Hot Encoding): 0.6790

📋 Relatório de Classificação - SVM:
              precision    recall  f1-score   support

    Venenoso       0.63      0.97      0.76       433
  Comestível       0.90      0.35      0.50       380

    accuracy                           0.68       813
   macro avg       0.77      0.66      0.63       813
weighted avg       0.76      0.68      0.64       813

