In [1]:
import numpy as np
import seaborn as sns
import time

from matplotlib import pyplot as plt
from IPython.display import clear_output
from multiprocessing import Pool, cpu_count
from joblib import Parallel, delayed

from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.decomposition import PCA
from sklearn.cluster import SpectralClustering
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.metrics import normalized_mutual_info_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import BaggingClassifier

from qiskit import transpile
from qiskit_aer import Aer
from qiskit_aer import AerSimulator
from qiskit.circuit import (
    Parameter, QuantumCircuit, ClassicalRegister, QuantumRegister
)
from qiskit.primitives import StatevectorSampler
from qiskit_algorithms.state_fidelities import ComputeUncompute
from qiskit_algorithms.optimizers import COBYLA, SPSA, SLSQP
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms import QSVC
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap
from qiskit.visualization import plot_histogram
from qiskit.primitives import Sampler

In [2]:
# Step 1: Fetch and preprocess the dataset
print("Fetching and preprocessing the dataset...")
data = fetch_covtype(shuffle=True, as_frame=True)
X, y = data.data, data.target

Fetching and preprocessing the dataset...


In [3]:
# Normalize features to [0, 1]
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [4]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# Step 4: One-vs-All training 
print("Training CSVM models in One-vs-All configuration...")
n_estimators = 100
svm_clf = OneVsRestClassifier(BaggingClassifier(estimator=LinearSVC(), max_samples=1.0 / n_estimators, n_estimators=n_estimators, n_jobs=-1), verbose=10)
svm_clf.fit(X_train, y_train)
print("Done")


Training CSVM models in One-vs-All configuration...


TypeError: LinearSVC.__init__() got an unexpected keyword argument 'cache_size'

In [6]:
# Function to predict on a chunk of data
def predict_chunk(chunk):
    return svm_clf.predict(chunk)

n_jobs = cpu_count()

# Split the test data into chunks
n_chunks = n_jobs  # Adjust based on available memory and CPU cores
X_test_chunks = np.array_split(X_test, n_chunks)

print(f"Making predictions in parallel on {n_jobs} cores...")
y_pred_chunks = Parallel(n_jobs=n_jobs)(delayed(predict_chunk)(chunk) for chunk in X_test_chunks)

# Combine results
y_pred = np.concatenate(y_pred_chunks)

print("Done")

Making predictions in parallel on 128 cores...
Done


In [7]:
# Evaluate the classifier
print("Evaluating the model...")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Done")


Evaluating the model...
Accuracy: 0.7091
Classification Report:
              precision    recall  f1-score   support

           1       0.70      0.67      0.69     42469
           2       0.73      0.80      0.76     56750
           3       0.61      0.88      0.72      7020
           4       0.64      0.16      0.25       553
           5       0.43      0.00      0.01      1889
           6       0.47      0.04      0.08      3410
           7       0.64      0.50      0.57      4112

    accuracy                           0.71    116203
   macro avg       0.60      0.44      0.44    116203
weighted avg       0.70      0.71      0.69    116203

Done
