In [30]:
import numpy as np
import seaborn as sns
import time

from matplotlib import pyplot as plt
from IPython.display import clear_output
from multiprocessing import Pool, cpu_count
from joblib import Parallel, delayed

from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.decomposition import PCA
from sklearn.cluster import SpectralClustering
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.metrics import normalized_mutual_info_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import BaggingClassifier

from qiskit import transpile
from qiskit_aer import Aer
from qiskit_aer import AerSimulator
from qiskit.circuit import (
    Parameter, QuantumCircuit, ClassicalRegister, QuantumRegister
)
from qiskit.primitives import StatevectorSampler
from qiskit_algorithms.state_fidelities import ComputeUncompute
from qiskit_algorithms.optimizers import COBYLA, SPSA, SLSQP
from qiskit_machine_learning.kernels import FidelityQuantumKernel, FidelityStatevectorKernel
from qiskit_machine_learning.algorithms import QSVC
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap
from qiskit.visualization import plot_histogram
from qiskit.primitives import Sampler

In [31]:
# Step 1: Fetch and preprocess the dataset
print("Fetching and preprocessing the dataset...")
data = fetch_covtype(shuffle=True, as_frame=True)
X, y = data.data, data.target

Fetching and preprocessing the dataset...


In [32]:
# Normalize features to [0, 1]
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [33]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [76]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import StandardScaler

class BaggingQSVM(BaseEstimator, ClassifierMixin):
    def __init__(self, base_model=QSVC(), n_estimators=10, classes_=None, n_features_in_=None, is_fitted_=True, models=[], n_components=2, random_state=None, n_jobs=-1):
        self.base_model = base_model  # Base classifier (Quantum SVM)
        self.n_estimators = n_estimators  # Number of models (Quantum SVM classifiers)
        self.n_components = n_components  # Number of PCA components
        self.random_state = random_state  # Random seed for reproducibility
        self.n_jobs = n_jobs  # Number of jobs to run in parallel
        self.models = models # List to store trained models
        
        # Explicit attributes for scikit-learn compatibility
        self.classes_ = classes_  # Unique class labels
        self.n_features_in_ = n_features_in_  # Number of features in training data
        self.is_fitted_ = is_fitted_  # Flag to indicate if the model is fitted


    def _train_model(self, X_chunk, y_chunk):
        """Train a single SVM model on a chunk of data"""
        if len(np.unique(y_chunk)) == 1:
            print("Skipping chunk due to single class in the chunk.")
            return None
        
        # Apply PCA to reduce the number of features
        X_chunk_pca = PCA(n_components=self.n_components).fit_transform(X_chunk)

        # Define the quantum feature map and quantum kernel
        feature_map = ZZFeatureMap(feature_dimension=X_chunk_pca.shape[1], reps=2)
        qkernel = FidelityQuantumKernel(feature_map=feature_map)

        # Instantiate the QSVM model with the quantum kernel
        model = self.base_model.set_params(quantum_kernel=qkernel)

        # Train the QSVM model on this chunk
        model.fit(X_chunk_pca, y_chunk)

        return model

    def fit(self, X, y):
        np.random.seed(self.random_state)

        # Split the data into disjoint chunks
        chunk_size = X.shape[0] // self.n_estimators
        chunks = [(X[i*chunk_size:(i+1)*chunk_size], y[i*chunk_size:(i+1)*chunk_size]) for i in range(self.n_estimators)]

        # Parallelize the training of n_estimators models
        models = Parallel(n_jobs=self.n_jobs)(delayed(self._train_model)(X_chunk, y_chunk) for X_chunk, y_chunk in chunks)

        # Store the trained models
        self.models = models

        return self

    def predict(self, X):
        if not self.is_fitted_:
            raise ValueError("This BaggingQSVM instance is not fitted yet.")
        # Collect predictions from all models
        predictions = np.zeros((self.n_estimators, X.shape[0]))

        # Apply PCA to the input data before prediction
        X_pca = PCA(n_components=self.n_components).fit_transform(X)

        for i, model in enumerate(self.models):
            predictions[i, :] = model.predict(X_pca)

        # Majority voting for classification
        return np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=0, arr=predictions)
    
    def decision_function(self, X):
        if not self.is_fitted_:
            raise ValueError("This BaggingQSVM instance is not fitted yet.")
        
        X_pca = PCA(n_components=self.n_components).fit_transform(X)
        decision_scores = np.zeros((len(self.models), X.shape[0]))
        for i, model in enumerate(self.models):
            decision_scores[i, :] = model.decision_function(X_pca)
        return np.mean(decision_scores, axis=0)

    def predict_proba(self, X):
        if not self.is_fitted_:
            raise ValueError("This BaggingQSVM instance is not fitted yet.")
        # Collect probabilities from all models
        probas = np.zeros((self.n_estimators, X.shape[0], 2))  # Two classes (+1 and -1)

        # Apply PCA to the input data before prediction
        X_pca = PCA(n_components=self.n_components).fit_transform(X)

        for i, model in enumerate(self.models):
            probas[i, :, :] = model.predict_proba(X_pca)

        # Average the probabilities
        return np.mean(probas, axis=0)


In [77]:
print("Training QSVM models in One-vs-All configuration...")
n_estimators = 10
svm_clf = OneVsRestClassifier(BaggingQSVM(n_estimators=n_estimators, n_components=5, random_state=42, n_jobs=-1), verbose=10)
svm_clf.fit(X_train[:1000], y_train[:1000])
print("Done")


Training QSVM models in One-vs-All configuration...


[Parallel(n_jobs=1)]: Done   1 tasks      | elapsed:   37.2s
[Parallel(n_jobs=1)]: Done   4 tasks      | elapsed:  2.4min


Done


[Parallel(n_jobs=1)]: Done   7 tasks      | elapsed:  4.2min
[Parallel(n_jobs=1)]: Done   7 tasks      | elapsed:  4.2min


In [None]:
n_jobs = 1
# Function to predict on a chunk of data
def predict_chunk(chunk):
    return svm_clf.predict(chunk)

# Split the test data into chunks
n_chunks = n_jobs  # Adjust based on available memory and CPU cores
X_test_chunks = np.array_split(X_test[:50], n_chunks)

print(f"Making predictions in parallel on {n_jobs} cores...")
y_pred_chunks = Parallel(n_jobs=1)(delayed(predict_chunk)(chunk) for chunk in X_test_chunks)

# Combine results
y_pred = np.concatenate(y_pred_chunks)

print("Done")

Making predictions in parallel on 1 cores...


In [None]:
# Evaluate the classifier
print("Evaluating the model...")
accuracy = accuracy_score(y_test[:50], y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test[:50], y_pred))
print("Done")
