In [1]:
import torch
import torch.nn as nn
from merlin import QuantumLayer, ComputationSpace, LexGrouping
from merlin.builder import CircuitBuilder

In [2]:
import torch.nn.functional as F
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [3]:
import numpy as np
import perceval as pcvl
import torch
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from merlin.algorithms.kernels import FeatureMap, FidelityKernel, KernelCircuitBuilder

torch.manual_seed(0)
np.random.seed(0)

In [4]:
import pandas as pd
from sklearn.preprocessing import  MinMaxScaler

import pandas as pd



def load_data(file_path: str) -> pd.DataFrame:
    print(f"Loading data from {file_path}")
    return pd.read_csv(file_path)


def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:

    X = df.iloc[:, 1:-1].values
    y = df.iloc[:, -1].values
    print('X = ', X)

    sc_X = MinMaxScaler()
    X_train = sc_X.fit_transform(X)
    print("X train = ", X_train)
            
    print("Preprocessing complete.")

    return X_train,y

def save_cleaned_data(df: pd.DataFrame, output_path: str) -> None:
    print(f"\nSaving cleaned data to {output_path}")
    df= pd.DataFrame(df, columns=['income','credit_utilization','payment_history','num_open_accounts','debt_to_income','loan_amount'])
    
    df.to_csv(output_path, index=False)
    print("Done.")


input_path = "data/credit_train.csv"
output_path = "data/credit_train_cleaned.csv"



df = load_data(input_path)
# inspect_data(df)
df_cleaned,y_train = preprocess_data(df)
# inspect_data(df_cleaned)
save_cleaned_data(df_cleaned, output_path)


test_input_path = "data/credit_test.csv"
test_output_path = "data/credit_test_cleaned.csv"

df_test = load_data(test_input_path)
# inspect_data(df)
df_test_cleaned,y_test = preprocess_data(df_test)
# inspect_data(df_cleaned)
save_cleaned_data(df_test_cleaned, test_output_path)

# ============================================================
# 1. Tensor conversions & DataLoader
# ============================================================
def convert_dataset_to_tensor(x_train, x_test, y_train, y_test):
    x_train = torch.tensor(x_train, dtype=torch.float32)
    x_test = torch.tensor(x_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.long)
    y_test = torch.tensor(y_test, dtype=torch.long)
    return x_train, x_test, y_train, y_test


def convert_tensor_to_loader(x_train, y_train, batch_size=6):
    train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True
    )
    return train_loader



x_train_np=df_cleaned
y_train_np=y_train

x_test_np=df_test_cleaned
y_test_np=y_test
# Number of classes inferred from training labels
num_classes = int(len(np.unique(y_train_np)))

# Convert to tensors
x_train, x_test, y_train, y_test = convert_dataset_to_tensor(
    x_train_np, x_test_np, y_train_np, y_test_np
)


# Build DataLoader
#train_loader = convert_tensor_to_loader(x_train, y_train, batch_size=6)


Loading data from data/credit_train.csv
X =  [[ 0.619  0.529  0.418 14.     0.308  0.516]
 [ 0.473  0.434  0.829 17.     0.352  0.945]
 [ 0.389  0.724  0.848  7.     0.604  0.629]
 ...
 [ 0.328  0.94   0.693  2.     0.809  0.826]
 [ 0.452  0.424  0.753 14.     0.383  0.646]
 [ 0.416  0.526  0.503 15.     0.489  0.606]]
X train =  [[0.61158798 0.55075594 0.37852495 0.8125     0.308      0.31054131]
 [0.45493562 0.44816415 0.82429501 1.         0.352      0.92165242]
 [0.36480687 0.76133909 0.84490239 0.375      0.604      0.47150997]
 ...
 [0.29935622 0.99460043 0.67678959 0.0625     0.809      0.75213675]
 [0.43240343 0.43736501 0.74186551 0.8125     0.383      0.4957265 ]
 [0.39377682 0.5475162  0.47071584 0.875      0.489      0.43874644]]
Preprocessing complete.

Saving cleaned data to data/credit_train_cleaned.csv
Done.
Loading data from data/credit_test.csv
X =  [[ 0.629  0.465  0.489 10.     0.419  0.764]
 [ 0.676  0.178  0.616  9.     0.181  0.641]
 [ 0.678  0.466  0.8   13.    

In [5]:
print(len(x_test_np))

240


In [6]:
kernel = FidelityKernel.simple(
    input_size=6,
    n_modes=6,
    shots=0,  # exact probabilities
    no_bunching=False,
    dtype=torch.float32,
    device=torch.device("cpu"),
)

K_train = kernel(x_train)
K_test = kernel(x_test, x_train)

print("Train Gram shape:", K_train.shape)
print("Test Gram shape:", K_test.shape)

Train Gram shape: torch.Size([600, 600])
Test Gram shape: torch.Size([240, 600])


In [7]:
svc = SVC(kernel="precomputed")
svc.fit(K_train.detach().numpy(), y_train)
train_accuracy = svc.score(K_train.detach().numpy(), y_train)
print(f"SVM train accuracy (precomputed kernel): {train_accuracy:.3f}")
test_accuracy = svc.score(K_test.detach().numpy(), y_test)
print(f"SVM accuracy (precomputed kernel): {test_accuracy:.3f}")

SVM train accuracy (precomputed kernel): 0.932
SVM accuracy (precomputed kernel): 0.942


In [8]:
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    roc_auc_score,
    average_precision_score,
    balanced_accuracy_score
)

K_train_np = K_train.detach().numpy()
K_test_np = K_test.detach().numpy()
y_train_np = y_train.numpy()
y_test_np = y_test.numpy()

# Predictions
y_pred_train = svc.predict(K_train_np)
y_pred_test = svc.predict(K_test_np)

print("\n=== Confusion matrix (test) ===")
print(confusion_matrix(y_test_np, y_pred_test))

print("\n=== Classification report (test) ===")
print(classification_report(y_test_np, y_pred_test, digits=3))

print("\nBalanced accuracy (test):",
      balanced_accuracy_score(y_test_np, y_pred_test))

# Probabilities / scores for AUC
scores_test = svc.decision_function(K_test_np)

print("ROC-AUC (test):",
      roc_auc_score(y_test_np, scores_test))

print("PR-AUC (test):",
      average_precision_score(y_test_np, scores_test))



=== Confusion matrix (test) ===
[[180   7]
 [  7  46]]

=== Classification report (test) ===
              precision    recall  f1-score   support

           0      0.963     0.963     0.963       187
           1      0.868     0.868     0.868        53

    accuracy                          0.942       240
   macro avg      0.915     0.915     0.915       240
weighted avg      0.942     0.942     0.942       240


Balanced accuracy (test): 0.9152456866108365
ROC-AUC (test): 0.9704368883059228
PR-AUC (test): 0.9239170118418321
