# Task 3: Hyperparameter Tuning and Cross-Validation

This notebook performs hyperparameter tuning and k-fold cross-validation for SVM and MLP models using the processed WDBC dataset.
Steps include:
- Loading processed data
- Train/test split
- Applying k-fold cross-validation
- Hyperparameter tuning using GridSearchCV
- Evaluating tuned models


## 1. Load Processed Data

In [None]:
import numpy as np
X_scaled = np.load('X_scaled.npy')
y = np.load('y.npy')
print('Features shape:', X_scaled.shape)
print('Labels shape:', y.shape)

## 2. Train/Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, stratify=y, random_state=42)
print('Train shape:', X_train.shape, 'Test shape:', X_test.shape)

## 3. Apply k-Fold Cross-Validation

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

# Example: 5-fold CV for SVM
svm_model = SVC(kernel='linear', probability=True)
cv_scores_svm = cross_val_score(svm_model, X_train, y_train, cv=5)
print('SVM CV Scores:', cv_scores_svm)
print('SVM Mean CV Score:', cv_scores_svm.mean())

## 4. Hyperparameter Tuning using GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

# SVM parameter grid
svm_params = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto']
}

svm_grid = GridSearchCV(SVC(probability=True), svm_params, cv=5, scoring='accuracy')
svm_grid.fit(X_train, y_train)
print('Best SVM Params:', svm_grid.best_params_)
print('Best SVM CV Score:', svm_grid.best_score_)

# MLP parameter grid
mlp_params = {
    'hidden_layer_sizes': [(100,), (500,), (500,500)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'max_iter': [300]
}

mlp_grid = GridSearchCV(MLPClassifier(random_state=42), mlp_params, cv=5, scoring='accuracy')
mlp_grid.fit(X_train, y_train)
print('Best MLP Params:', mlp_grid.best_params_)
print('Best MLP CV Score:', mlp_grid.best_score_)

## 5. Evaluate Tuned Models on Test Set

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

# Evaluate SVM
best_svm = svm_grid.best_estimator_
y_pred_svm = best_svm.predict(X_test)
print('Tuned SVM Accuracy:', accuracy_score(y_test, y_pred_svm))
print('Classification Report:
', classification_report(y_test, y_pred_svm))
cm_svm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Blues')
plt.title('Tuned SVM Confusion Matrix')
plt.show()

# ROC Curve for SVM
y_prob_svm = best_svm.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve(y_test, y_prob_svm)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'SVM (AUC = {roc_auc:.2f})')
plt.plot([0,1],[0,1],'--',color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('SVM ROC Curve')
plt.legend()
plt.show()

# Evaluate MLP
best_mlp = mlp_grid.best_estimator_
y_pred_mlp = best_mlp.predict(X_test)
print('Tuned MLP Accuracy:', accuracy_score(y_test, y_pred_mlp))
print('Classification Report:
', classification_report(y_test, y_pred_mlp))
cm_mlp = confusion_matrix(y_test, y_pred_mlp)
sns.heatmap(cm_mlp, annot=True, fmt='d', cmap='Greens')
plt.title('Tuned MLP Confusion Matrix')
plt.show()

# ROC Curve for MLP
y_prob_mlp = best_mlp.predict_proba(X_test)[:,1]
fpr_mlp, tpr_mlp, _ = roc_curve(y_test, y_prob_mlp)
roc_auc_mlp = auc(fpr_mlp, tpr_mlp)
plt.plot(fpr_mlp, tpr_mlp, label=f'MLP (AUC = {roc_auc_mlp:.2f})')
plt.plot([0,1],[0,1],'--',color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('MLP ROC Curve')
plt.legend()
plt.show()

## 6. Save Tuned Models

In [None]:
import joblib
joblib.dump(best_svm, 'best_svm_model.pkl')
joblib.dump(best_mlp, 'best_mlp_model.pkl')
print('Tuned models saved: best_svm_model.pkl, best_mlp_model.pkl')