In [None]:
import os                   # for file system operations (creating directories)
import time                 # to measure training duration
import joblib               # for saving/loading Python objects (scaler, model, etc.)
import pandas as pd         # for data manipulation

from sklearn.datasets        import load_breast_cancer
from sklearn.preprocessing   import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics         import (
    accuracy_score,
    roc_auc_score,
    f1_score,
    confusion_matrix
)

from qiskit.circuit.library          import ZFeatureMap
from qiskit_machine_learning.kernels import FidelityStatevectorKernel
from qiskit_machine_learning.algorithms.classifiers import QSVC

# 1) Load & filter data
# ---------------------
# - Select only the four user-defined features from the breast cancer dataset.
features = ['radius error', 'worst radius', 'worst area', 'worst concave points']
data   = load_breast_cancer()
X_raw  = pd.DataFrame(data.data, columns=data.feature_names)[features]
y      = data.target

# 2) Scale
# --------
# - Standardize features to zero mean and unit variance.
scaler   = StandardScaler()
X_scaled = scaler.fit_transform(X_raw)

# 3) Train/test split
# -------------------
# - Hold out 20% of the data for testing, stratified by class label.
X_tr, X_te, y_tr, y_te = train_test_split(
    X_scaled,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# 4) Build ZFeatureMap (single-qubit Z rotations only)
# ----------------------------------------------------
# - feature_dimension = number of qubits = number of features
# - reps = how many times to repeat the RZ layer
# - insert_barriers = whether to add barriers between repetitions
k    = X_tr.shape[1]
fmap = ZFeatureMap(
    feature_dimension=k,  
    reps=1,               
    insert_barriers=False
)  # by design, ZFeatureMap applies only RZ gates (no entanglement)

#  ── add a chain of CZs for "linear" entanglement ──
# Loop over neighboring qubit pairs to introduce entanglement
for i in range(k-1):
    fmap.cz(i, i+1)

# 5) Kernel & classifier
# ----------------------
# - FidelityStatevectorKernel uses the feature map to compute kernel matrix.
# - QSVC wraps an SVM over the quantum kernel, enabling probability outputs.
qk   = FidelityStatevectorKernel(feature_map=fmap)
qsvc = QSVC(quantum_kernel=qk, probability=True)

# 6) Train QSVC
# -------------
# - Fit the quantum SVM on the training data and record training time.
t0 = time.time()
qsvc.fit(X_tr, y_tr)
train_time = time.time() - t0

# 7) Predictions & metrics
# ------------------------
# - Generate predictions on train & test sets.
# - Compute accuracy, overfit measure, AUC, F1, and confusion-matrix stats.
y_tr_q      = qsvc.predict(X_tr)
y_te_q      = qsvc.predict(X_te)
q_train_acc = accuracy_score(y_tr, y_tr_q)
q_test_acc  = accuracy_score(y_te, y_te_q)
q_overfit   = q_train_acc - q_test_acc
q_auc       = roc_auc_score(y_te, qsvc.predict_proba(X_te)[:,1])
q_f1        = f1_score(y_te, y_te_q)
tn, fp, fn, tp = confusion_matrix(y_te, y_te_q).ravel()

# 8) Print results
# ----------------
print(f"QSVC_Train_Acc:  {q_train_acc:.4f}")
print(f"QSVC_Test_Acc:   {q_test_acc:.4f}")
print(f"QSVC_Overfit:    {q_overfit:.4f}")
print(f"QSVC_AUC:        {q_auc:.4f}")
print(f"QSVC_F1:         {q_f1:.4f}")
print(f"QSVC_TP / FP / TN / FN: {tp} / {fp} / {tn} / {fn}")
print(f"Training duration (s): {train_time:.2f}")

# 9) Save for backend
# -------------------
# - Create folder if it doesn't exist, then pickle scaler, feature map, and model.
os.makedirs('saved_models', exist_ok=True)
joblib.dump(scaler, 'saved_models/scaler.pkl')
joblib.dump(fmap,   'saved_models/feature_map.pkl')  # fmap now includes CZs
joblib.dump(qsvc,   'saved_models/qsvc_model.pkl')
print("\nScaler, feature map, and QSVC saved in ./saved_models/")
