In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score
import matplotlib.pyplot as plt
from qiskit import QuantumCircuit, Aer, execute
from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings('ignore')  # Ignore all warnings


# Load the dataset
df = pd.read_csv('yourdataset.csv')

# Separate X and y, dropping non-useful columns
X = df.drop(['target'], axis="columns")
y = df['target']

# Split train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Identify and drop constant columns
constant_columns = X_train.columns[X_train.nunique() == 1]
X_train = X_train.drop(columns=constant_columns)
X_test = X_test.drop(columns=constant_columns)

# Calculate the correlation of each feature with the target variable
correlations = X_train.corrwith(y_train)

# Define the number of dimensions for clustering
num_dimensions = 8

# Reshape the correlations into a 2D array
correlations_reshaped = np.reshape(correlations.values, (-1, 1))

# Fit the k-means algorithm to the reshaped correlations
kmeans = KMeans(n_clusters=num_dimensions, random_state=0).fit(correlations_reshaped)

# Assign each feature to a cluster
clusters = kmeans.labels_

# Split the features into groups based on the cluster assignments
groups = [np.where(clusters == i)[0] for i in range(num_dimensions)]

# Apply LDA to each group of features to create a new feature
features_lda = np.empty((X_train.shape[0], num_dimensions))
features_lda_test = np.empty((X_test.shape[0], num_dimensions))

for i, group in enumerate(groups):
    lda = LDA(n_components=1)
    features_lda[:, i] = lda.fit_transform(X_train.iloc[:, group], y_train).ravel()
    features_lda_test[:, i] = lda.transform(X_test.iloc[:, group]).ravel()

# Normalize the LDA-transformed features for QSVC
minmax_scaler = StandardScaler().fit(features_lda)
X_train_qsvc = minmax_scaler.transform(features_lda)
X_test_qsvc = minmax_scaler.transform(features_lda_test)

# Quantum Reservoir Functions
def create_quantum_reservoir(num_qubits, depth):
    """
    A quantum reservoir with random rotations and entanglement.
    :param num_qubits: Number of qubits in the reservoir.
    :param depth: Number of layers in the reservoir circuit.
    :return: QuantumCircuit object.
    """
    qc = QuantumCircuit(num_qubits)
    np.random.seed(42)  # For reproducibility

    for _ in range(depth):
        for qubit in range(num_qubits):
            qc.rx(np.random.uniform(0, 2 * np.pi), qubit)  # Random X-rotation
#            qc.rz(np.random.uniform(0, 2 * np.pi), qubit)  # Random Z-rotation (Optional)
#            qc.ry(np.random.uniform(0, 2 * np.pi), qubit)  # Random Y-rotation (Optional)
        for qubit in range(num_qubits - 1):
            qc.cx(qubit, qubit + 1)  # Add entanglement
    return qc

def encode_input(qc, data, num_qubits):
    """
    Encode classical data into the quantum circuit.
    :param qc: QuantumCircuit object.
    :param data: List of input values.
    :param num_qubits: Number of qubits in the reservoir.
    """
    for i, value in enumerate(data):
        qc.ry(value, i % num_qubits)  # Encode data using Y-rotation

def run_reservoir(qc, num_qubits):
    """
    Simulate the quantum reservoir and extract features from measurements.
    :param qc: QuantumCircuit object.
    :param num_qubits: Number of qubits.
    :return: Feature vector (measurement probabilities).
    """
    backend = Aer.get_backend('qasm_simulator')
    qc.measure_all()  # Add measurement gates
    result = execute(qc, backend, shots=200).result()  # Simulate with 100 shots
    counts = result.get_counts()

    # Normalize counts to get probabilities
    probabilities = []
    for i in range(2 ** num_qubits):
        state = bin(i)[2:].zfill(num_qubits)  # Convert index to binary string
        probabilities.append(counts.get(state, 0) / 100)
    return probabilities

# Parameters for Quantum Reservoir
num_qubits = num_dimensions
depth = 8

# Extract features from the quantum reservoir
def extract_quantum_features(data, num_qubits, depth):
    features = []
    for sample in data:
        qc = create_quantum_reservoir(num_qubits, depth)
        encode_input(qc, sample, num_qubits)
        features.append(run_reservoir(qc, num_qubits))
    return np.array(features)

# Using LDA features for quantum reservoir
X_train_quantum = extract_quantum_features(X_train_qsvc, num_qubits, depth)
X_test_quantum = extract_quantum_features(X_test_qsvc, num_qubits, depth)

# Train a Linear Regression model
model = LinearRegression()
model.fit(X_train_quantum, y_train)

# Predict and evaluate
y_pred = model.predict(X_test_quantum)
y_pred_binary = (y_pred > 0.5).astype(int)  # Convert to binary predictions
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Calculate AUC
auc = roc_auc_score(y_test, y_pred)
print(f"Test AUC: {auc}")

# Calculate KS Statistic
fpr, tpr, _ = roc_curve(y_test, y_pred)
ks_stat = max(tpr - fpr)
print(f"Test KS Statistic: {ks_stat}")


Test Accuracy: 75.00%
Test AUC: 0.7860696517412936
Test KS Statistic: 0.4568068747173225
