In [93]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from qiskit import BasicAer
from qiskit import IBMQ
from qiskit.tools.monitor import job_monitor
from qiskit.circuit.library import ZZFeatureMap
from qiskit.aqua import QuantumInstance, aqua_globals
from qiskit.aqua.algorithms import QSVM
from qiskit.aqua.components.multiclass_extensions import AllPairs, OneAgainstRest
from qiskit.aqua.utils import split_dataset_to_data_and_labels, map_label_to_class_name

import sklearn
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier

import time

seed = 10599
aqua_globals.random_seed = seed

In [95]:
### Import, process data
df = pd.read_csv("data/stem_processed_all.csv")

### Subset
df = df.iloc[0:50]

LABEL_COL = 'TECH3'

x = df.drop(columns=[LABEL_COL])
y = df[LABEL_COL]
# x = df.drop(columns=['TECH3', 'TECH6'])

xtrain_raw, xtest_raw, ytrain, ytest = sklearn.model_selection.train_test_split(x, y, test_size=0.2)
# xtrain, xtest, ytrain, ytest = sklearn.model_selection.train_test_split(x, y, test_size=0.2)

sel = SelectFromModel(sklearn.ensemble.RandomForestClassifier(n_estimators = 100), 
                      max_features=5)
sel.fit(xtrain_raw, ytrain)
support = sel.get_support()
newdf_columns = []
for column, sup in zip(x.columns, support):
    if sup == True:
        newdf_columns.append(column)
xtrain = xtrain_raw[newdf_columns].copy()
xtest = xtest_raw[newdf_columns].copy()

feature_dim = len(xtrain.columns)

train_inp_3 = {}
test_inp_3 = {}
train_inp_6 = {}
test_inp_6 = {}

for i in range(3):
    train_inp_3[i] = xtrain[ytrain == i].values.astype(float)
    test_inp_3[i] = xtest[ytest == i].values.astype(float)
#     train_inp_6[i] = xtrain[ytrain['TECH6'] == i].values
#     test_inp_6[i] = xtest[ytest['TECH6'] == i].values

temp = [test_inp_3[k] for k in test_inp_3]
total_array_3 = np.concatenate(temp)
# temp = [test_inp_6[k] for k in test_inp_3]
# total_array_6 = np.concatenate(temp)

# train_inp_3 = train_inp_3.astype(float)
# test_inp_3 = test_inp_3.astype(float)

In [99]:
### Classical SVM
#################
from qiskit.aqua.algorithms import SklearnSVM

accs = []
times = []
for i in range(10):
    start = time.time()
    result = SklearnSVM(train_inp_3, test_inp_3, total_array_3, multiclass_extension=AllPairs()).run()
    accs.append(result['testing_accuracy'])
    times.append(time.time()-start)

print(np.mean(accs))
print(np.mean(times))

# for k,v in result.items():
#     print(f'{k} : {v}')

# print(f'Testing success ratio: {result["testing_accuracy"]}')
# print()
# print('Prediction from datapoints set:')
# print(f'  ground truth: {map_label_to_class_name(datapoints[1], qsvm.label_to_class)}')
# print(f'  prediction:   {result["predicted_classes"]}')
# predicted_labels = result["predicted_labels"]
# print(f'  success rate: {100*np.count_nonzero(predicted_labels == datapoints[1])/len(predicted_labels)}%')

# kernel_matrix = result['kernel_matrix_training']
# plt.imshow(np.asmatrix(kernel_matrix), interpolation='nearest', origin='upper', cmap='bone_r');

# def f_importances(coef, names):
#     imp = coef
#     imp,names = zip(*sorted(zip(imp,names)))
#     plt.barh(range(len(names)), imp, align='center')
#     plt.yticks(range(len(names)), names)
#     plt.show()

# features_names = [df.columns]
# svm = svm.SVC(kernel='linear')
# svm.fit(X, Y)
# f_importances(svm.coef_, features_names)

0.5
0.0029744625091552733


In [55]:
### View available backends

# IBMQ.save_account('44a66dd506ef42e540395efa67c2b160558f6425abe98aa5900a5747979902e98add88d240bdd52284ef049ad26aeaefe5dfd167760bf50d6b3777f08e3515c2', overwrite=True)
IBMQ.load_account()

provider = IBMQ.get_provider(group='open')
provider.backends(filters=lambda x: x.configuration().n_qubits > 5 and not x.configuration().simulator)

In [44]:
### Quantum SVM
###############

### SOLUTION TO UFUNC ERROR ###
# In ~/anaconda3/envs/qis/lib/python3.8/site-packages/sklearn/utils/multiclass.py
# add            sum_of_confidences = np.real(sum_of_confidences) 
# just before    sum_of_confidences[:, i] -= confidences[:, k]

class_labels = [0,1,2]

    # Alternate multiclass extension: OneAgainstRest(), ErrorCorrectingCode(code_size=5), AllPairs()
feature_map = ZZFeatureMap(feature_dimension=feature_dim, reps=2, entanglement='linear')
qsvm = QSVM(feature_map, train_inp_3, test_inp_3, total_array_3, 
            multiclass_extension=AllPairs())

backend = BasicAer.get_backend('qasm_simulator')
# backend = provider.get_backend("ibmq_16_melbourne")
quantum_instance = QuantumInstance(backend, shots=256, seed_simulator=seed, 
                                   seed_transpiler=seed)

result = qsvm.run(quantum_instance)

for k,v in result.items():
    print(f'{k} : {v}')

  


UFuncTypeError: Cannot cast ufunc 'add' output from dtype('complex128') to dtype('float64') with casting rule 'same_kind'

In [None]:
### Visualize results
kernel_matrix = result['kernel_matrix_training']
plt.imshow(np.asmatrix(kernel_matrix),interpolation='nearest',origin='upper',cmap='bone_r');

In [None]:
# df = df.drop(columns=[''])
# label_col = 'TECH3'

# labels = set(df[label_col])

# data_dict = {}
# for label in labels:
#     data_dict.update({label:df[df[label_col] == label].drop(columns=[label_col]).to_numpy()})
    
# data_dict

In [None]:
# from qiskit.ml.datasets import iris, ad_hoc_data, sample_ad_hoc_data

# feature_dim = 4
# training_size = 110
# test_size = 40

# sample_total, training_input, test_input, class_labels = iris(
#     training_size=training_size,
#     test_size=test_size,
#     n=feature_dim)

# print(test_input)