# Quantum Support Vector Machine

SVM but uses quantum kernels.

## Part I - Regression

### Generic Imports

In [1]:
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from sklearn import linear_model, model_selection
from sklearn.metrics import r2_score
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler #StandardScaler is sensitive to outlier

### Verify File Content

In [2]:
df = pd.read_csv("qml_training-validation-data.csv", index_col = 0)
df.head(6)

Unnamed: 0_level_0,el_neg,B/GPa,Volume/A^3,SFE/mJm^-3
Element,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Be,1.57,130.0,8.09,23.48
Sc,1.36,57.0,25.0,16.16
Ti,1.54,110.0,17.6,24.44
Co,1.88,180.0,11.0,37.64
Zn,1.65,70.0,15.2,20.98
Y,1.22,41.0,33.0,8.52


### Qiskit Imports

In [3]:
from IPython.display import clear_output

from qiskit_aer import Aer
from qiskit.circuit.library import PauliFeatureMap, RealAmplitudes, ZZFeatureMap
from qiskit_machine_learning.algorithms import VQR
from qiskit_machine_learning.datasets import ad_hoc_data
from qiskit_machine_learning.algorithms import QSVR,QSVC
from qiskit_machine_learning.kernels import FidelityQuantumKernel

### Data Processing

In [4]:
def prepare_dataset(y_scaler,X,y, test_ratio):
    X_scaler = MinMaxScaler()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, shuffle=True)
    element_test = X_test[:,0]
    X=X[:,1:]
    max_arr=np.max(X,axis=0)
    X=X/max_arr
    sample_train=X_train[:,1:]/max_arr
    sample_test=X_test[:,1:]/max_arr

    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    X_train = minmax_scale.transform(sample_train)
    X_test = minmax_scale.transform(sample_test)
    
    return X_train, y_train, X_test, y_test, element_test

In [5]:
def prepare_dataset_k_fold(X, y, train_indices, test_indices):
    X_scaler = MinMaxScaler()
    X_train, X_test = X[train_indices], X[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    element_test = X_test[:,0]
    X=X[:,1:]
    max_arr=np.max(X,axis=0)
    X=X/max_arr
    sample_train=X_train[:,1:]/max_arr
    sample_test=X_test[:,1:]/max_arr

    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    X_train = minmax_scale.transform(sample_train)
    X_test = minmax_scale.transform(sample_test)
    
    return X_train, y_train, X_test, y_test, element_test

### Feature Map

In [6]:
def config_feature_map(feature_dimension, reps, entanglement): # assume ZZFeatureMap
    # return ZZFeatureMap(feature_dimension=feature_dimension, reps=reps, entanglement=entanglement, insert_barriers=True)
    return PauliFeatureMap(feature_dimension=feature_dimension, reps=reps, entanglement=entanglement)

### Kernel

In [7]:
def reconfig_quantum_kernel_qsvr(feature_map, C, epsilon):
    kernel = FidelityQuantumKernel(feature_map=feature_map)
    qsvr = QSVR(C=C, epsilon=epsilon, quantum_kernel=kernel)
    return qsvr

### Training

In [8]:
def train(qsv_ ,X_train, y_train, X_test):
    qsv_.fit(X_train, y_train)
    y_hat = qsv_.predict(X_test)
    return y_hat

### Inverse Transform

In [9]:
def inverse_transform(y_scaler, y_test, y_hat):
    y_hat = y_scaler.inverse_transform(y_hat.reshape(-1,1))
    y_test = y_scaler.inverse_transform(y_test.reshape(-1,1))
    return y_test, y_hat

### Graphing

In [10]:
# def graph(y_scaler, y_hat, y_test, message):
#     plt.plot(elementtest, y_hat, 'o')
#     plt.plot(elementtest, y_test, 'o')
    
#     plt.xlabel("Element")
#     plt.ylabel("SFE")
#     plt.legend(["Predicted", "Actual"], bbox_to_anchor=(1, 0.9), loc='upper left')
#     plt.savefig(message,dpi=300)
#     # plt.show()
#     plt.clf()

In [11]:
def graph_facet_qsvr(df, date):
    df['reps'] = df['reps'].astype('category')
    df['C'] = df['C'].astype('category')
    df['epsilon'] = df['epsilon'].astype('category')
    df['entanglement'] = df['entanglement'].astype('category')
    
    g = sns.catplot(
        data=df,
        x='reps',
        y='r^2',
        hue='entanglement',
        col='C',
        row='epsilon',
        kind='point',
        height=3,
        aspect=1,
        legend_out=True
    )
    
    g.set(ylim=(0, 1))
    g.fig.suptitle("Parameter Tuning Results Faceted by $C$ and $\epsilon$", y=1.03)
    g.set_axis_labels("reps", r"$R^2$")
    for ax in g.axes.flat:
        title = ax.get_title()
        # Replace 'C = value' with 'C = value' using math notation
        title = title.replace('C =', r'$C=$')
        # Replace 'epsilon = value' with 'ε = value' using math notation
        title = title.replace('epsilon =', r'$\epsilon=$')
        ax.set_title(title)

    g.fig.subplots_adjust(right=0.9)  # Slightly less space on the right
    g.fig.savefig(f'QSVR/zz/figure/{date}_averaged.png', dpi=300, bbox_inches='tight')
    plt.close(g.fig)

  g.fig.suptitle("Parameter Tuning Results Faceted by $C$ and $\epsilon$", y=1.03)


In [12]:
# reload the data from a CSV file and graph
def load_data_and_graph(file_path, date):
    df = pd.read_csv(file_path)
    graph_facet_qsvr(df, date)

# load_data_and_graph('QSVR/zz/result/4_17_25_1_all.csv', '4_17_25')

In [13]:
def save_each_run(y_test, y_hat, element_test, message):
    plt.plot(element_test, y_hat, 'o')
    plt.plot(element_test, y_test, 'o')
    
    plt.xlabel("Element")
    plt.ylabel("SFE")
    plt.legend(["Predicted", "Actual"], bbox_to_anchor=(1, 0.9), loc='upper left')
    plt.savefig(message, dpi=300)
    plt.close()

### Accuracy

In [14]:
def accuracy_qsvr(y_test, y_hat):
    # r, _ = pearsonr(y_test, y_hat)
    # return r[0]**2
    return r2_score(y_test, y_hat)

### Main Logic

In [17]:
def main_qsvr(date):
    df = pd.read_csv("qml_training-validation-data.csv")
    X = df[['Element', 'el_neg', 'B/GPa', 'Volume/A^3']].values
    y = df['SFE/mJm^-3'].values

    y_scaler = MinMaxScaler()
    y = y_scaler.fit_transform(y.reshape(-1,1))
    
    test_ratio = 0.2 #0.1

    reps_range = [1,2,5]
    C_range = [0.1,1,10,100]
    epsilon_range = [1,0.1,0.01,0.001]
    entanglement_options = ["full", "linear", "circular"]
    iter = 20 #30
    
    warnings.filterwarnings('ignore')
    
    template = f"QSVR/zz/figure/{date}/QVSR_zz_"
    # df = pd.DataFrame(columns=['reps', 'C', 'epsilon', 'entanglement', 'element test', 'iteration', 'r^2'])
    df = pd.DataFrame(columns=['reps', 'C', 'epsilon', 'entanglement', 'element test', 'iteration', 'actual', 'predicted'])

    df_avg = pd.DataFrame(columns=['reps', 'C', 'epsilon', 'entanglement', 'element test', 'r^2'])

    rkf = RepeatedKFold(n_splits=X.shape[0], n_repeats=3, random_state=42)

    y_test_all, y_hat_all=[],[]
    
    for C in C_range:
        print(f'current C is: {C}')
        for reps in reps_range:
            for eps in epsilon_range:
                for entanglement in entanglement_options:
                    # accuracy_avg = []
                    i = 0
                    for train_indices, test_indices in rkf.split(X):
                    # for i in range(iter):
                        # message = ''
                        # message += template
                        # message += f"{C}_{reps}_{eps}_{entanglement}_prediction_#{i}.png"
                        
                        # X_train, y_train, X_test, y_test, element_test = prepare_dataset(y_scaler,X,y, test_ratio)
                        X_train, y_train, X_test, y_test, element_test = prepare_dataset_k_fold(X, y, train_indices, test_indices)
                        
                        feature_map = config_feature_map(3, reps, entanglement)
                        qsvr=reconfig_quantum_kernel_qsvr(feature_map, C, eps)
                        y_hat = train(qsvr,X_train, y_train, X_test)
                        y_test, y_hat = inverse_transform(y_scaler, y_test, y_hat)
                        # y_test, y_hat = graph(y_scaler,y_hat, y_test, message)
                        y_test_all.append(y_test), y_hat_all.append(y_hat)
                        # acc = accuracy_qsvr(y_test, y_hat)
                        
                        # save_each_run(y_test, y_hat, element_test, message)
                        
                        # accuracy_avg.append(acc)
                        # new_row = {'reps': reps, 
                        #            'C': C, 
                        #            'epsilon': eps, 
                        #            'entanglement': entanglement,
                        #            'element test': element_test,
                        #            'iteration': i, 
                        #            'r^2': acc}
                        new_row = {'reps': reps, 
                                   'C': C, 
                                   'epsilon': eps, 
                                   'entanglement': entanglement,
                                   'element test': element_test,
                                   'iteration': i, 
                                   'actual': y_test,
                                   'predicted': y_hat}
                        df.loc[len(df)] = new_row
                        i+=1
                        print(f"{C}_{reps}_{eps}_{entanglement}_prediction_{i}")

                    # df_avg.loc[len(df_avg)] = {'reps': reps, 
                    #                            'C': C, 
                    #                            'epsilon': eps, 
                    #                            'entanglement': entanglement, 
                    #                            'element test': element_test,
                    #                            'r^2': sum(accuracy_avg)/(i+1)}
    
    # y_test_all=np.concatenate(y_test_all)
    # y_hat_all=np.concatenate(y_hat_all)
    # print("R2 score: ",accuracy(y_test_all, y_hat_all))
    
    # plt.scatter(y_test_all,y_hat_all)
    
    # plt.savefig(f"QSVR/zz/figure/{date}_all.png",dpi=300)

    df.to_csv(f'QSVR/zz/result/{date}_all.csv', index=False) 
    # df_avg.to_csv(f'QSVR/zz/result/{date}_averaged.csv', index=False)

### Run `main()` to Start

In [18]:
date = '4_23_25_0'
! mkdir QSVR/zz/figure/{date}
main_qsvr(date)

1_2_1_linear_prediction_42
1_2_1_linear_prediction_43
1_2_1_linear_prediction_44
1_2_1_linear_prediction_45
1_2_1_linear_prediction_46
1_2_1_linear_prediction_47
1_2_1_linear_prediction_48
1_2_1_linear_prediction_49
1_2_1_linear_prediction_50
1_2_1_linear_prediction_51
1_2_1_linear_prediction_52
1_2_1_linear_prediction_53
1_2_1_linear_prediction_54
1_2_1_linear_prediction_55
1_2_1_linear_prediction_56
1_2_1_linear_prediction_57
1_2_1_linear_prediction_58
1_2_1_linear_prediction_59
1_2_1_linear_prediction_60
1_2_1_linear_prediction_61
1_2_1_linear_prediction_62
1_2_1_linear_prediction_63
1_2_1_circular_prediction_1
1_2_1_circular_prediction_2
1_2_1_circular_prediction_3
1_2_1_circular_prediction_4
1_2_1_circular_prediction_5
1_2_1_circular_prediction_6
1_2_1_circular_prediction_7
1_2_1_circular_prediction_8
1_2_1_circular_prediction_9
1_2_1_circular_prediction_10
1_2_1_circular_prediction_11
1_2_1_circular_prediction_12
1_2_1_circular_prediction_13
1_2_1_circular_prediction_14
1_2_1_cir

In [29]:
# seems like C can't be too small, epsilon preferably 0.1 to 0.001, 
# https://www.ibm.com/think/topics/hyperparameter-tuning
# lower C allows for higher tolerance and a smoother boundary
# reps is the number repetition of the circuit, not sure if it will lead 

In [None]:
# Through two runs, I observe the following reps, C, and epsilon values that will provide a strong correlation (>0.70)
# reps = 1 or 2, preferably 1;  C = 1, 10, or 100; epsilon = 0.01 or 0.001 (makes sense as smaller step size lead to better result)

## Classification

In [15]:
def reconfig_quantum_kernel_qsvc(feature_map, C, gamma):
    kernel = FidelityQuantumKernel(feature_map=feature_map)
    qsvc = QSVC(C=C, gamma=gamma, quantum_kernel=kernel)
    return qsvc

In [16]:
def accuracy_qsvc(y_test, y_hat):
    agree=0
    for i in range(len(y_test)):
        if np.round(y_test[i]-y_hat[i],1)==0:
            agree+=1
    return agree/len(y_test)

In [17]:
def process(y):
    for i in range(0,len(y)): 
        if y[i]>19: y[i]=0
        else: y[i]=1
    return y

### Graph

In [18]:
def graph_facet_qsvc(df, date):
    df['reps'] = df['reps'].astype('category')
    df['C'] = df['C'].astype('category')
    df['gamma'] = df['gamma'].astype('category')
    df['entanglement'] = df['entanglement'].astype('category')
    
    g = sns.catplot(
        data=df,
        x='reps',
        y='$R^2$',
        hue='entanglement',
        col='C',
        row='gamma',
        kind='point',
        height=3,
        aspect=1,
        legend_out=True
    )
    
    g.set(ylim=(0, 1))
    g.fig.suptitle("Parameter Tuning Results Faceted by $C$ and $\gamma$", y=1.03)
    g.set_axis_labels("reps", r"$R^2$")
    for ax in g.axes.flat:
        title = ax.get_title()
        # Replace 'C = value' with 'C = value' using math notation
        title = title.replace('C =', r'$C=$')
        # Replace 'epsilon = value' with 'ε = value' using math notation
        title = title.replace('gamma =', r'$\gamma=$')
        ax.set_title(title)
    g.fig.subplots_adjust(right=0.9)  # Slightly less space on the right
    g.fig.savefig(f'QSVR/zz/figure/{date}_averaged.png', dpi=300, bbox_inches='tight')
    plt.close(g.fig)

  g.fig.suptitle("Parameter Tuning Results Faceted by $C$ and $\gamma$", y=1.03)


In [19]:
def box_plot_for_c(vertical_values, date):
    plt.figure(figsize=(12,6))
    plt.boxplot(vertical_values, positions=C_range, patch_artist=True)
    num = len(vertical_values)
    evenly_spaced = np.arange(num)
    plt.xticks(evenly_spaced, vertical_values)
    plt.xlabel('C')
    plt.ylabel('Accuracy')
    plt.title('Box Plot for Each C Value')
    plt.savefig(f"QSVC/zz/figure/box_plot_for_each_c_{date}.png",dpi=300)
    plt.close()

In [20]:
def main_qsvc(date):
    df = pd.read_csv("qml_training-validation-data.csv")
    X = df[['Element', 'el_neg', 'B/GPa', 'Volume/A^3']].values
    y = df['SFE/mJm^-3'].values

    y = process(y)
    y_scaler = MinMaxScaler()
    
    test_ratio = 0.2 #0.1

    reps_range = [1,2,5]
    C_range = [0.1, 1, 10, 100]
    gamma_range = [1, 0.1, 0.01, 0.001]
    entanglement_options = ["full", "linear", "circular"]
    iter = 20
    
    warnings.filterwarnings('ignore')

    template = f"QSVC/zz/figure/{date}/QVSC_zz_"
    df = pd.DataFrame(columns=['reps', 'C', 'gamma', 'entanglement', 'iteration', 'accuracy'])
    df_avg = pd.DataFrame(columns=['reps', 'C', 'gamma', 'entanglement', 'accuracy'])

    y_test_all, y_hat_all=[],[]
    
    for C in C_range:
        print(f"C is: {C}")
        for reps in reps_range:# reps
            for gamma in gamma_range:
                for entanglement in entanglement_options:
                    accuracy_avg = []
                    for i in range(iter):
                        message = ''
                        message += template
                        message += f"{C}_{reps}_{gamma}_{entanglement}_prediction_#{i}.png"
                        
                        X_train, y_train, X_test, y_test, element_test = prepare_dataset(y_scaler, X, y, test_ratio)
                        
                        feature_map = config_feature_map(3, reps, entanglement_options)
                        qsvc=reconfig_quantum_kernel_qsvc(feature_map, C, gamma)
                        
                        y_hat = train(qsvc,X_train, y_train, X_test)
                        y_test, y_hat = inverse_transform(y_scaler, y_test, y_hat)
    
                        acc = accuracy_qsvc(y_test, y_hat)
                        # y_test_all.append(y_test), y_hat_all.append(y_hat)
                        accuracy_avg.append(acc)
                        new_row = {'reps': reps, 
                                   'C': C, 
                                   'gamma': gamma, 
                                   'entanglement': entanglement, 
                                   'iteration': i, 
                                   'accuracy': accuracy_qsvc(y_test, y_hat)}
                        df.loc[len(df)] = new_row

                    df_avg.loc[len(df_avg)] = {'reps': reps, 
                                           'C': C, 
                                           'gamma': gamma, 
                                           'entanglement': entanglement, 
                                           'accuracy': sum(accuracy_avg)/iter}
                    
    # y_test_all=np.concatenate(y_test_all)
    # y_hat_all=np.concatenate(y_hat_all)
    # print("R2 score: ",accuracy(y_test_all, y_hat_all))
    
    # plt.scatter(y_test_all,y_hat_all)    
    
    df.to_csv(f'QSVC/zz/result/{date}_all.csv', index=False) 
    df_avg.to_csv(f'QSVC/zz/result/{date}_averaged.csv', index=False) 

In [None]:
date = '4_20_25_0'
! mkdir QSVC/zz/figure/{date}
main_qsvc(date)
date = '4_20_25_1'
! mkdir QSVC/zz/figure/{date}
main_qsvc(date)

mkdir: cannot create directory 'QSVC/zz/figure/4_17_25_0': File exists
C is: 0.1


In [None]:
# https://www.datasciencebase.com/supervised-ml/optimization-tuning/tuning-for-decision-trees-svms-other-algorithms/