In [62]:
import pennylane as qml
from pennylane import numpy as np
from pennylane import ApproxTimeEvolution

import pandas as pd
import matplotlib.pyplot as plt

from sklearn import model_selection
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score,roc_auc_score
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import GridSearchCV, KFold,StratifiedKFold

import random
from ast import literal_eval
from itertools import product
from imblearn.under_sampling import RandomUnderSampler
from typing import Dict, Any

In [122]:
train_file_1 = pd.read_csv('./data/Hemo56D/data1/HemoPI_train_model.csv')
test_file_1  = pd.read_csv('./data/Hemo56D/data1/HemoPI_validation.csv')
train_file_2 = pd.read_csv('./data/Hemo56D/data2/HemoPI_train_model.csv')
test_file_2  = pd.read_csv('./data/Hemo56D/data2/HemoPI_validation.csv')
train_file_3 = pd.read_csv('./data/Hemo56D/data3/HemoPI_train_model.csv')    
test_file_3  = pd.read_csv('./data/Hemo56D/data3/HemoPI_validation.csv')

len(train_file_1),len(test_file_1),len(train_file_2),len(test_file_2),len(train_file_3),len(test_file_3)

(884, 220, 812, 202, 1298, 325)

In [123]:
def pre_process(train_file,test_file):
    col_names = train_file_1.columns[2:-1].tolist()
    col_names.pop(47)   # remove the second 'Sequence' column
    
    scaler        = StandardScaler()  # mean 0 and std 1
    undersampleer = RandomUnderSampler(random_state=42)

    train_file[col_names] = scaler.fit_transform(train_file[col_names])
    test_file[col_names]  = scaler.transform(test_file[col_names])

    X_train = train_file[col_names].to_numpy()
    y_train = train_file['Label'].to_numpy()
    X_test  = test_file[col_names].to_numpy()
    y_test  = test_file['Label'].to_numpy()

    X_train, y_train = undersampleer.fit_resample(X_train, y_train)
    
    return X_train,y_train,X_test,y_test

In [124]:
x_hemo1,y_hemo1,x_hemo1_test,y_hemo1_test = pre_process(train_file_1,test_file_1)
x_hemo2,y_hemo2,x_hemo2_test,y_hemo2_test = pre_process(train_file_2,test_file_2)
x_hemo3,y_hemo3,x_hemo3_test,y_hemo3_test = pre_process(train_file_3,test_file_3)

In [125]:
# Function to create one random Hamiltonian operator
def one_operator(num_qubits):
    ops_list = [qml.PauliX, qml.PauliY, qml.PauliZ,qml.Identity] # Pauli matrices
    return qml.operation.Tensor(*(random.choice(ops_list)(i) for i in range(num_qubits)))

# Function to create multiple random Hamiltonian operators
def hamiltonian_operators(num_qubits, num_ops,num_samples=1):
    ops_all = []
    for _ in range(num_samples):
        ops = []
        for _ in range(num_ops):
            op = one_operator(num_qubits)
            ops.append(op)
        ops_all.append(ops)
    return ops_all

# Function to create the kernel matrix
def kernel_matrix(A,B):
    A = np.array(A)
    B = np.array(B)
    
    return np.absolute(np.matmul(np.conjugate(A),B.T)**2)


#####################################################

random.seed(42)

num_qubits = [6]  # number of qubits
L          = len(x_hemo1[0])    # number of operators
n_sample   = 3                # number of samples for each number of qubits
ops_dict   = {}

for n in num_qubits:
    ops_dict[f'{n}_qubits'] = hamiltonian_operators(num_qubits=n,num_ops=L,num_samples=n_sample)



In [148]:
# Function to train and test quantum kernels
def testing(num_qubits,time,step,ops_index,xdata,ydata,x_test,y_test):

    n_qubits = num_qubits
    dev_kernel = qml.device("lightning.qubit", wires=n_qubits)

    @qml.qnode(dev_kernel, interface="autograd")
    def kernel(x,ops,time=1,steps=1):   
        
        hamiltonian_1 = qml.Hamiltonian(x, ops)
        ApproxTimeEvolution(hamiltonian_1, time, steps)
        return qml.state()
    
    np.random.seed(42)
    
    # def kernel_matrix(A,B):
    #     A = np.array(A)
    #     B = np.array(B)
        
    #     return np.absolute(np.matmul(np.conjugate(A),B.T)**2)

    ops_n8 = ops_dict[str(n_qubits)+'_qubits'][ops_index]

    q_state    = [ kernel(x,ops_n8,time,step) for x in xdata] # training states
    test_state = [ kernel(x,ops_n8,time,step) for x in x_test] # testing states

    k_matrix   = kernel_matrix(q_state,q_state) # Compute kernel matrix with training set
    svm        = SVC(kernel='precomputed').fit(k_matrix, ydata)  # Fit 

    test_matrix = kernel_matrix(q_state,test_state)
    test_pred   = svm.predict(test_matrix.T)
    test_score  = svm.decision_function(test_matrix.T)

    test_acc    = accuracy_score(y_test,test_pred)

    precision = precision_score(y_test, test_pred,pos_label=1)
    recall    = recall_score(y_test, test_pred,pos_label=1)
    f1_       = f1_score(y_test, test_pred,pos_label=1)
    roc_auc_  = roc_auc_score(y_test, test_score)

    print(f"Testing accuracy: {test_acc:.5f}")
    print(f"Precision: {precision:.5f}")
    print(f"Recall: {recall:.5f}")
    print(f"F1: {f1_:.5f}")
    print(f"ROC AUC: {roc_auc_:.5f}")


    return [test_acc,precision,recall,f1_,roc_auc_]

## HemoPI with 56 descriptors

#### Quantum 

In [149]:
hemo1_q = testing(6,0.3,10,1,x_hemo1,y_hemo1,x_hemo1_test,y_hemo1_test)

Testing accuracy: 0.95909
Precision: 0.97196
Recall: 0.94545
F1: 0.95853
ROC AUC: 0.98595


In [128]:
hemo1_q = testing(6,0.3,10,1,x_hemo1,y_hemo1,x_hemo1_test,y_hemo1_test)

Testing accuracy: 0.95909
Precision: 0.97196
Recall: 0.94545
F1: 0.95853
ROC AUC: 0.98595


In [129]:
hemo2_q = testing(6,0.15,10,2,x_hemo2,y_hemo2,x_hemo2_test,y_hemo2_test)

Testing accuracy: 0.73267
Precision: 0.76415
Recall: 0.73636
F1: 0.75000
ROC AUC: 0.82381


In [130]:
hemo3_q = testing(6,0.15,10,2,x_hemo3,y_hemo3,x_hemo3_test,y_hemo3_test)

Testing accuracy: 0.78154
Precision: 0.75728
Recall: 0.88136
F1: 0.81462
ROC AUC: 0.83402


#### Classical, helper function

In [131]:
def test_metric(param,x,y,x_test,y_test):
    model = SVC(**param)
    model.fit(x,y)
    y_pred         = model.predict(x_test)
    y_pred_prob    = model.decision_function(x_test)
    test_score     = model.score(x_test,y_test)
    
    precision = precision_score(y_test, y_pred,pos_label=1)
    recall  = recall_score(y_test, y_pred,pos_label=1)
    f1_ = f1_score(y_test, y_pred,pos_label=1)
    roc_auc_ = roc_auc_score(y_test, y_pred_prob)


    # print("Testing accuracy:{:0.5f}".format(test_score))  
    # print('Precision:{:0.5f}'.format(precision))
    # print('Recall:{:0.5f}'.format(recall))
    # print('F1:{:0.5f}'.format(f1_))
    # print('ROC AUC:{:0.5f}'.format(roc_auc_))

    print(f"Testing accuracy: {test_score:.5f}")
    print(f"Precision: {precision:.5f}")
    print(f"Recall: {recall:.5f}")
    print(f"F1: {f1_:.5f}")
    print(f"ROC AUC: {roc_auc_:.5f}")

    return [test_score,precision,recall,f1_,roc_auc_]

#### Classical: Linear

In [132]:
linear_params1 = {'C': 1, 'kernel': 'linear'}
linear_params2 = {'C': 0.1, 'kernel': 'linear'}
linear_params3 = {'C': 1, 'kernel': 'linear'}

In [133]:
print('HemoPI1')
hemo1_linear =test_metric(linear_params1,x_hemo1,y_hemo1,x_hemo1_test,y_hemo1_test)

HemoPI1
Testing accuracy: 0.94091
Precision: 0.94495
Recall: 0.93636
F1: 0.94064
ROC AUC: 0.98727


In [134]:
print('HemoPI2')
hemo2_linear = test_metric(linear_params2,x_hemo2,y_hemo2,x_hemo2_test,y_hemo2_test)

HemoPI2
Testing accuracy: 0.70297
Precision: 0.70492
Recall: 0.78182
F1: 0.74138
ROC AUC: 0.76294


In [135]:
print('HemoPI3')    
hemo3_linear = test_metric(linear_params3,x_hemo3,y_hemo3,x_hemo3_test,y_hemo3_test)

HemoPI3
Testing accuracy: 0.74462
Precision: 0.72381
Recall: 0.85876
F1: 0.78553
ROC AUC: 0.79188


#### Classical: RBF

In [136]:
rbf_params1 = {'C': 1000, 'gamma': 0.001,'kernel':'rbf'}
rbf_params2 = {'C': 1000, 'gamma': 0.0001,'kernel':'rbf'}
rbf_params3 = {'C': 10, 'gamma': 0.001,'kernel':'rbf'}

In [137]:
print('HemoPI1')
hemo1_rbf = test_metric(rbf_params1,x_hemo1,y_hemo1,x_hemo1_test,y_hemo1_test)

HemoPI1
Testing accuracy: 0.95455
Precision: 0.99020
Recall: 0.91818
F1: 0.95283
ROC AUC: 0.98934


In [138]:
print('HemoPI2')
hemo2_rbf = test_metric(rbf_params2,x_hemo2,y_hemo2,x_hemo2_test,y_hemo2_test)

HemoPI2
Testing accuracy: 0.70792
Precision: 0.71795
Recall: 0.76364
F1: 0.74009
ROC AUC: 0.77994


In [139]:
print('HemoPI3')
hemo3_rbf = test_metric(rbf_params3,x_hemo3,y_hemo3,x_hemo3_test,y_hemo3_test)

HemoPI3
Testing accuracy: 0.75077
Precision: 0.73077
Recall: 0.85876
F1: 0.78961
ROC AUC: 0.80654


#### Classical: Poly

In [140]:
poly_params1 = {'C': 10, 'degree': 3,'kernel':'poly'}
poly_params2 = {'C': 0.1, 'degree': 3,'kernel':'poly'}
poly_params3 = {'C': 1, 'degree': 3,'kernel':'poly'}

In [141]:
print('HemoPI1')
hemo1_poly = test_metric(poly_params1,x_hemo1,y_hemo1,x_hemo1_test,y_hemo1_test)

HemoPI1
Testing accuracy: 0.94545
Precision: 0.96226
Recall: 0.92727
F1: 0.94444
ROC AUC: 0.97050


In [142]:
print('HemoPI2')   
hemo2_poly = test_metric(poly_params2,x_hemo2,y_hemo2,x_hemo2_test,y_hemo2_test)

HemoPI2
Testing accuracy: 0.66832
Precision: 0.75904
Recall: 0.57273
F1: 0.65285
ROC AUC: 0.79150


In [143]:
print('HemoPI3')
hemo3_poly = test_metric(poly_params3,x_hemo3,y_hemo3,x_hemo3_test,y_hemo3_test)

HemoPI3
Testing accuracy: 0.75077
Precision: 0.72222
Recall: 0.88136
F1: 0.79389
ROC AUC: 0.79657


In [144]:
# Combine the lists for each dataset (each sublist is a kernel's metrics)
data_hemo1 = [hemo1_q, hemo1_linear, hemo1_rbf, hemo1_poly]
data_hemo2 = [hemo2_q, hemo2_linear, hemo2_rbf, hemo2_poly]
data_hemo3 = [hemo3_q, hemo3_linear, hemo3_rbf, hemo3_poly]

df_hemo1 = pd.DataFrame(data_hemo1, index=['Quantum', 'Linear', 'RBF', 'Poly'], 
                    columns=['Acc', 'Precision', 'Recall', 'F1', 'AUC'])
df_hemo2 = pd.DataFrame(data_hemo2, index=['Quantum', 'Linear', 'RBF', 'Poly'],
                    columns=['Acc', 'Precision', 'Recall', 'F1', 'AUC'])
df_hemo3 = pd.DataFrame(data_hemo3, index=['Quantum', 'Linear', 'RBF', 'Poly'],
                    columns=['Acc', 'Precision', 'Recall', 'F1', 'AUC'])


In [145]:
df_hemo1.style.set_caption('HemoPI1 - 56 Descriptors').highlight_max(color = 'blue', axis = 0)

Unnamed: 0,Acc,Precision,Recall,F1,AUC
Quantum,0.959091,0.971963,0.945455,0.958525,0.98595
Linear,0.940909,0.944954,0.936364,0.940639,0.987273
RBF,0.954545,0.990196,0.918182,0.95283,0.989339
Poly,0.945455,0.962264,0.927273,0.944444,0.970496


In [146]:
df_hemo2.style.set_caption('HemoPI2 - 56 Descriptors').highlight_max(color = 'blue', axis = 0)

Unnamed: 0,Acc,Precision,Recall,F1,AUC
Quantum,0.732673,0.764151,0.736364,0.75,0.823814
Linear,0.70297,0.704918,0.781818,0.741379,0.762945
RBF,0.707921,0.717949,0.763636,0.740088,0.779941
Poly,0.668317,0.759036,0.572727,0.65285,0.791502


In [147]:
df_hemo3.style.set_caption('HemoPI3 - 56 Descriptors').highlight_max(color = 'blue', axis = 0)

Unnamed: 0,Acc,Precision,Recall,F1,AUC
Quantum,0.781538,0.757282,0.881356,0.814621,0.83402
Linear,0.744615,0.72381,0.858757,0.78553,0.791877
RBF,0.750769,0.730769,0.858757,0.78961,0.806535
Poly,0.750769,0.722222,0.881356,0.793893,0.796572
