In [1]:
"""
SQQNN for Breast Cancer Classification with K-Fold Cross Validation
This script evaluates a quantum-inspired binary classifier on the Breast Cancer Wisconsin dataset
using the KFolderCV class for cross-validation, printing all results to console.
"""

import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo
from KFolderCV import KFolderCV

# Set random seed for reproducibility
np.random.seed(0)

# Load Breast Cancer Wisconsin dataset from UCI
print("\nLoading Breast Cancer Wisconsin dataset...")
breast_cancer_data = fetch_ucirepo(id=17)
X = breast_cancer_data.data.features.values
y = breast_cancer_data.data.targets.values

print(f"\nDataset dimensions - Features: {X.shape}, Targets: {y.shape}")

# Convert labels to binary (Malignant = 1, Benign = 0)
y = np.where(y == 'M', 1, 0)

# Normalize features
m = np.min(X)
M = np.max(X)
X = 1*(X-m)/(M-m) - 0.5

# Network configuration
neurons_number = 3
n_splits = 10  # Number of folds for cross-validation

# Initialize KFolderCV
print(f"\nInitializing {n_splits}-fold cross validation...")
kfolder = KFolderCV(n_splits=n_splits)

# Run cross-validation
print("\nRunning cross-validation...")
results = kfolder.cross_validate(X, y, neurons_number)

# Unpack results
(mean_acc, std_acc, 
 mean_prec, std_prec, 
 mean_sens, std_sens, 
 mean_spec, std_spec, 
 mean_f1, std_f1) = results

# Print comprehensive results
print("\n" + "="*50)
print("Cross-Validation Results Summary")
print("="*50)
print(f"\nConfiguration:")
print(f"- Number of folds: {n_splits}")
print(f"- Number of neurons: {neurons_number}")
print(f"- Total samples: {len(y)}")
print(f"- Feature dimensions: {X.shape[1]}")

print("\n" + "-"*40)
print("Performance Metrics (Mean ± Std across folds)")
print("-"*40)
print(f"Accuracy:    {mean_acc:.4f} ± {std_acc:.4f}")
print(f"Precision:   {mean_prec:.4f} ± {std_prec:.4f}")
print(f"Sensitivity: {mean_sens:.4f} ± {std_sens:.4f}")
print(f"Specificity: {mean_spec:.4f} ± {std_spec:.4f}")
print(f"F1 Score:    {mean_f1:.4f} ± {std_f1:.4f}")

print("\n" + "="*50)
print("Interpretation Guide")
print("="*50)
print("\nKey Metrics:")
print("- Accuracy: Overall correctness of predictions")
print("- Precision: Proportion of positive identifications that were correct")
print("- Sensitivity (Recall): Proportion of actual positives correctly identified")
print("- Specificity: Proportion of actual negatives correctly identified")
print("- F1 Score: Harmonic mean of precision and sensitivity")

print("\nCross-validation completed successfully.")


Loading Breast Cancer Wisconsin dataset...

Dataset dimensions - Features: (569, 30), Targets: (569, 1)

Initializing 10-fold cross validation...

Running cross-validation...

Cross-Validation Results Summary

Configuration:
- Number of folds: 10
- Number of neurons: 3
- Total samples: 569
- Feature dimensions: 30

----------------------------------------
Performance Metrics (Mean ± Std across folds)
----------------------------------------
Accuracy:    0.9543 ± 0.0212
Precision:   1.0000 ± 0.0000
Sensitivity: 0.8799 ± 0.0502
Specificity: 0.8799 ± 0.0502
F1 Score:    0.9353 ± 0.0285

Interpretation Guide

Key Metrics:
- Accuracy: Overall correctness of predictions
- Precision: Proportion of positive identifications that were correct
- Sensitivity (Recall): Proportion of actual positives correctly identified
- Specificity: Proportion of actual negatives correctly identified
- F1 Score: Harmonic mean of precision and sensitivity

Cross-validation completed successfully.
