# Diabetes analysis using QNN
This has been collected using direct questionnaires from the patients of Sylhet Diabetes
Hospital in Sylhet, Bangladesh and approved by a doctor. The dataset has the following features:
- Age [continuous] 1.20-65
- Sex [categorical] 1. Male, 2.Female
- Polyuria [categorical] 1.Yes, 2.No.
- Polydipsia [categorical] 1.Yes, 2.No.
- sudden weight loss [categorical] 1.Yes, 2.No.
- weakness [categorical] 1.Yes, 2.No.
- Polyphagia [categorical] 1.Yes, 2.No.
- Genital thrush [categorical] 1.Yes, 2.No.
- visual blurring [categorical] 1.Yes, 2.No.
- Itching [categorical] 1.Yes, 2.No.
- Irritability [categorical] 1.Yes, 2.No.
- delayed healing [categorical] 1.Yes, 2.No.
- partial paresis [categorical] 1.Yes, 2.No.
- muscle stifness [categorical] 1.Yes, 2.No.
- Alopecia [categorical] 1.Yes, 2.No.
- Obesity [categorical] 1.Yes, 2.No.

- Class 1.Positive, 2.Negative.

In [1]:
from qiskit import QuantumCircuit
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit.algorithms.optimizers import COBYLA, ADAM
from qiskit.circuit.library import TwoLocal, ZZFeatureMap
from qiskit_machine_learning.algorithms import VQC
from qiskit.providers.aer import QasmSimulator

import numpy as np
import matplotlib.pyplot as plt
from time import time
from tqdm import tqdm
import numpy as np 
import pandas as pd

seed = 1376
algorithm_globals.random_seed = seed



## Importing the dataset

Most of the dataset is categorical. We need to map it to an integer notation to actually use it on the quantum circuits. We choose to map the first feature (`Yes`, `Male`) to $1$, and the second to $-1$. We then apply a normalization to the continuous variable, the Age.

Finally, for the class we use the one hot encoding.

In [42]:
data = pd.read_csv('data/diabetes/data.csv')

# Features
data.replace('Yes', 1, inplace=True)
data.replace('No', -1, inplace=True)
data.replace('Male', 1, inplace=True)
data.replace('Female', -1, inplace=True)
data['Age'] = (data['Age']-data['Age'].mean())/data['Age'].std()

# Class
data.replace('Positive', 1, inplace=True)
data.replace('Negative', 0, inplace=True)

In [43]:
data.head()

Unnamed: 0,Age,Gender,Polyuria,Polydipsia,sudden weight loss,weakness,Polyphagia,Genital thrush,visual blurring,Itching,Irritability,delayed healing,partial paresis,muscle stiffness,Alopecia,Obesity,class
0,-0.660731,1,-1,1,-1,1,-1,-1,-1,1,-1,1,-1,1,1,1,1
1,0.820572,1,-1,-1,-1,1,-1,-1,1,-1,-1,-1,1,-1,1,-1,1
2,-0.578436,1,1,-1,-1,1,1,-1,-1,1,-1,1,-1,1,1,-1,1
3,-0.249258,1,-1,-1,1,1,1,1,-1,1,-1,1,-1,-1,-1,-1,1
4,0.985161,1,1,1,1,1,1,-1,1,1,1,1,1,1,1,1,1


In [44]:
# Number of features, i.e. of qubits in the circuit
features = data.iloc[:, :-1].to_numpy()
labels = data.iloc[:, -1].to_numpy()
labels = np.array([ [0, 1] if l==0 else [1, 0] for l in labels ])

feature_dim = features.shape[1]

## Circuit preparation

We use a ZZFeature map, with linear entanglement (i.e. only local gates), to map the data points to the quantum circuit, and then a TwoLocal ansazt as neural network, having $32$ trainable parameters.

In [45]:
feature_map = ZZFeatureMap(feature_dimension=feature_dim, reps=4, entanglement='linear', insert_barriers=True)
ansatz = TwoLocal(feature_dim, ['ry'], 'cx', reps=1, entanglement='linear')

In [27]:
from sklearn.metrics import log_loss

algorithm_globals.random_seed = seed
np.random.seed(123)

loss_sv = []
n_epochs = 1
start = time()
vqc_sv = VQC(feature_map=feature_map,
          ansatz=ansatz,
          optimizer=ADAM(lr=1e-1, maxiter=1, snapshot_dir='data/diabetes/'),
          quantum_instance=QuantumInstance(QasmSimulator(method='statevector'),
                                           shots=1024,
                                           seed_simulator=seed,
                                           seed_transpiler=seed),
          warm_start=True
          )

In [46]:
starting_params = np.random.uniform(-1, 1, len(ansatz.parameters))
vqc_sv._fit_result = [starting_params ]
 
for i in tqdm(range(n_epochs) ):
    idx = np.random.randint(0, features.shape[0], 64)
    training_features = features[idx,:]
    training_labels = labels[idx]
    #print(vqc_sv._fit_result)
    vqc_sv.fit(training_features, training_labels)
    loss_sv.append( log_loss(  training_labels, vqc_sv.predict(training_features) ) )
    #vqc_sv._optimizer.load_params('data/MPS-STATEVECT_IRIS')

score_sv = vqc_sv.score(training_features, training_labels)
print('Score: {:0.2f}'.format(score_sv))
print('Time: {:0.2f}'.format(time()-start))

100%|██████████████████████████████████████████████████████████████████████████████████| 1/1 [47:36<00:00, 2856.11s/it]


Score: 0.47
Time: 4514.45


(64, 16)

In [50]:
from sklearn.metrics import log_loss

algorithm_globals.random_seed = seed
np.random.seed(123)

loss_mps = []
n_epochs = 1
start = time()
vqc_mps = VQC(feature_map=feature_map,
          ansatz=ansatz,
          optimizer=ADAM(lr=1e-1, maxiter=1, snapshot_dir='data/diabetes/'),
          quantum_instance=QuantumInstance(QasmSimulator(method='matrix_product_state', 
                                                         matrix_product_state_max_bond_dimension=4),
                                           shots=1024,
                                           seed_simulator=seed,
                                           seed_transpiler=seed),
          warm_start=True
          )

In [None]:
starting_params = np.random.uniform(-1, 1, len(ansatz.parameters))
vqc_mps._fit_result = [starting_params ]
 
for i in tqdm(range(n_epochs) ):
    idx = np.random.randint(0, features.shape[0], 64)
    training_features = features[idx,:]
    training_labels = labels[idx]
    #print(vqc_sv._fit_result)
    vqc_mps.fit(training_features, training_labels)
    loss_mps.append( log_loss(  training_labels, vqc_sv.predict(training_features) ) )
    #vqc_sv._optimizer.load_params('data/MPS-STATEVECT_IRIS')

score_mps = vqc_mps.score(training_features, training_labels)
print('Score: {:0.2f}'.format(score_mps))
print('Time: {:0.2f}'.format(time()-start))

  0%|                                                                                            | 0/1 [00:00<?, ?it/s]