In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, cross_val_score
import json
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
)

In [3]:
print("="*70)
print("SVM (POLYNOMIAL KERNEL) - 5-FOLD CROSS VALIDATION")
print("="*70)

data = np.load("brain_mri_preprocessed.npz")

X_train_flat = data["X_train"]
y_train      = data["y_train"]

X_val_flat   = data["X_val"]
y_val        = data["y_val"]

X_test_flat  = data["X_test"]
y_test       = data["y_test"]

SVM (POLYNOMIAL KERNEL) - 5-FOLD CROSS VALIDATION


In [4]:
# ===== CONFIGURAR MODELO =====
svm_model = SVC(
    kernel='poly',
    degree=2,
    C=1.0,
    gamma=0.01,
    random_state=42
)

In [5]:
# ===== K-FOLD CROSS VALIDATION =====
print("\n[1/3] Running 5-Fold Cross Validation...")
print("(This may take 5-10 minutes - SVM is slower)\n")

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Cross validation (isto demora mais tempo que LR)
cv_scores_svm = cross_val_score(
    svm_model, 
    X_train_flat, 
    y_train, 
    cv=skf, 
    scoring='accuracy',
    n_jobs=-1,
    verbose=1  # Mostra progresso
)

print("\n5-Fold CV Results:")
for i, score in enumerate(cv_scores_svm, 1):
    print(f"  Fold {i}: {score:.4f}")
print(f"  " + "-"*40)
print(f"  Mean:    {cv_scores_svm.mean():.4f}")
print(f"  Std Dev: {cv_scores_svm.std():.4f}")
print(f"  Min:     {cv_scores_svm.min():.4f}")
print(f"  Max:     {cv_scores_svm.max():.4f}")


[1/3] Running 5-Fold Cross Validation...
(This may take 5-10 minutes - SVM is slower)



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.5min



5-Fold CV Results:
  Fold 1: 0.9776
  Fold 2: 0.9670
  Fold 3: 0.9829
  Fold 4: 0.9787
  Fold 5: 0.9787
  ----------------------------------------
  Mean:    0.9770
  Std Dev: 0.0053
  Min:     0.9670
  Max:     0.9829


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.1min finished


In [6]:
# ===== TREINAR NO TRAINING SET COMPLETO =====
print("\n[2/3] Training on full training set...")
svm_model.fit(X_train_flat, y_train)
print("✓ Model trained")


[2/3] Training on full training set...
✓ Model trained


In [7]:
# ===== AVALIAR NO VALIDATION SET =====
print("\n[3/3] Evaluating on validation and test sets...")

y_val_pred_svm = svm_model.predict(X_val_flat)
val_acc_svm = accuracy_score(y_val, y_val_pred_svm)

print(f"Validation Accuracy: {val_acc_svm:.4f}")


[3/3] Evaluating on validation and test sets...
Validation Accuracy: 0.9686


In [8]:
# ===== AVALIAR NO TEST SET =====
y_test_pred_svm = svm_model.predict(X_test_flat)

test_acc_svm = accuracy_score(y_test, y_test_pred_svm)
test_prec_svm = precision_score(y_test, y_test_pred_svm)
test_rec_svm = recall_score(y_test, y_test_pred_svm)
test_f1_svm = f1_score(y_test, y_test_pred_svm)

print(f"\nTest Set Performance:")
print(f"  Accuracy:  {test_acc_svm:.4f}")
print(f"  Precision: {test_prec_svm:.4f}")
print(f"  Recall:    {test_rec_svm:.4f}")
print(f"  F1-score:  {test_f1_svm:.4f}")


Test Set Performance:
  Accuracy:  0.9842
  Precision: 0.9989
  Recall:    0.9799
  F1-score:  0.9893


In [9]:
# Confusion matrix
cm_svm = confusion_matrix(y_test, y_test_pred_svm)
print(f"\nConfusion Matrix:")
print(cm_svm)


Confusion Matrix:
[[308   1]
 [ 18 878]]


In [10]:
# Overfitting check
train_acc_svm = svm_model.score(X_train_flat, y_train)
gap_svm = train_acc_svm - test_acc_svm

print(f"\nOverfitting Analysis:")
print(f"  Train Accuracy: {train_acc_svm:.4f}")
print(f"  Test Accuracy:  {test_acc_svm:.4f}")
print(f"  Gap:            {gap_svm:.4f} ({gap_svm*100:.2f}%)")

Exception ignored in: <function ResourceTracker.__del__ at 0x7af216382c00>
Traceback (most recent call last):
  File "/home/matildxi/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/home/matildxi/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/home/matildxi/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7392a8b82c00>
Traceback (most recent call last):
  File "/home/matildxi/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/home/matildxi/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/home/matildxi/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function Resour


Overfitting Analysis:
  Train Accuracy: 1.0000
  Test Accuracy:  0.9842
  Gap:            0.0158 (1.58%)


In [11]:
# ===== GUARDAR RESULTADOS =====
svm_results = {
    'model': 'SVM (Polynomial Kernel)',
    'hyperparameters': {
        'kernel': 'poly',
        'degree': 2,
        'C': 1.0,
        'gamma': 0.01
    },
    'cross_validation': {
        'mean_accuracy': float(cv_scores_svm.mean()),
        'std_dev': float(cv_scores_svm.std()),
        'fold_scores': cv_scores_svm.tolist()
    },
    'validation_set': {
        'accuracy': float(val_acc_svm)
    },
    'test_performance': {
        'accuracy': float(test_acc_svm),
        'precision': float(test_prec_svm),
        'recall': float(test_rec_svm),
        'f1_score': float(test_f1_svm)
    },
    'overfitting': {
        'train_accuracy': float(train_acc_svm),
        'test_accuracy': float(test_acc_svm),
        'gap': float(gap_svm)
    }
}

with open('svm_cv_results.json', 'w') as f:
    json.dump(svm_results, f, indent=4)

print("\n✓ Results saved to: svm_cv_results.json")
print("="*70)
print("SVM (POLYNOMIAL) - COMPLETED ✓")
print("="*70)


✓ Results saved to: svm_cv_results.json
SVM (POLYNOMIAL) - COMPLETED ✓
