In [1]:
from sklearn import svm  
from sklearn import metrics  
import pandas as pd   
import numpy as np

In [2]:
x = np.loadtxt('z_runs.txt')
y = np.loadtxt('ground_truth.txt')
print(np.shape(x))
print(np.shape(y))

(8320, 20)
(8320,)


In [3]:
def dataset_split(x,y,ratio=0.7):
    N,D = np.shape(x)
    ind_cut = int(ratio * N)
    ind = np.random.permutation(N)
    train_data = x[ind[:ind_cut]]
    val_data = x[ind[ind_cut:]]
    train_label = y[ind[:ind_cut]]
    val_label = y[ind[ind_cut:]]
    return train_data, train_label, val_data, val_label

In [4]:
train_data, train_label, val_data, val_label = dataset_split(x,y)

In [5]:
clf = svm.SVC(probability=True)
clf.fit(train_data, train_label)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [6]:
y_predict = clf.predict(val_data)

In [7]:
def Get_Accuracy(y_true, y_pred): #Accuracy 准确率：分类器正确分类的样本数与总样本数之比 
    accuracy = metrics.accuracy_score(y_true,y_pred)  
    return accuracy
 
def Get_Precision_score(y_true, y_pred): #Precision：精准率 正确被预测的正样本(TP)占所有被预测为正样本(TP+FP)的比例. 
    precision = metrics.precision_score(y_true,y_pred)  
    return precision
 
def Get_Recall(y_true, y_pred): #Recall 召回率 正确被预测的正样本(TP)占所有真正 正样本(TP+FN)的比例.  
    Recall = metrics.recall_score(y_true,y_pred)  
    return Recall 
 
def Get_f1_score(y_true, y_pred): #F1-score: 精确率(precision)和召回率(Recall)的调和平均数  
    f1_score = metrics.f1_score(y_true,y_pred)  
    return f1_score
 
def Get_Auc_value(y_true,y_proba):  
    #fpr, tpr, thresholds = metrics.roc_curve(samples_test_y, proba_pred_y, pos_label=2)    
    auc = metrics.roc_auc_score(y_true, y_proba)  
    return auc

In [8]:
y_pred_probability = clf.predict_proba(val_data)  
print(y_pred_probability) 

[[0.16427826 0.83572174]
 [0.12831059 0.87168941]
 [0.13890595 0.86109405]
 ...
 [0.19590688 0.80409312]
 [0.43377627 0.56622373]
 [0.11345228 0.88654772]]


In [9]:
df2 = pd.DataFrame(y_pred_probability)  
proba_pred_y = np.array(df2[1])  #截取样本点预测为正样本的预测概率
# df2.to_csv("pred_probability.csv")  
# print(proba_pred_y)

In [10]:
score = clf.score(val_data,val_label)  
print("SVM 模型打分: Score = %f"%score)  
accuracy = Get_Accuracy(val_label,y_predict)
print("SVM Accuracy_Score = %f"%accuracy)  
precision = Get_Precision_score(val_label,y_predict)
print("SVM Precision = %f"%precision)
recall = Get_Recall(val_label,y_predict)
print("SVM Recall = %f"%recall) 
f1_score = Get_f1_score(val_label,y_predict)
print("SVM F1-Score  = %f"%f1_score) 
auc = Get_Auc_value(val_label, proba_pred_y)
print("SVM AUC value: AUC = %f"%auc)  

SVM 模型打分: Score = 0.817708
SVM Accuracy_Score = 0.817708
SVM Precision = 0.749030
SVM Recall = 0.998523
SVM F1-Score  = 0.855967
SVM AUC value: AUC = 0.866618


In [5]:
from qiskit import BasicAer
from qiskit.aqua.utils import split_dataset_to_data_and_labels, map_label_to_class_name
from qiskit.aqua.input import ClassificationInput
from qiskit.aqua import run_algorithm, QuantumInstance
from qiskit.aqua.algorithms import QSVM
from qiskit.aqua.components.feature_maps import SecondOrderExpansion

# setup aqua logging
import logging
from qiskit.aqua import set_qiskit_aqua_logging
# set_qiskit_aqua_logging(logging.DEBUG)  # choose INFO, DEBUG to see the log

In [6]:
def distribute_normal_abnormal(train_data,train_label):
    N = len(train_data)
    data_normal = []
    data_abnormal = []
    for k in range(N):
        if train_label[k] == 1:
            tmp = train_data[k]
            data_normal.append(tmp)
        elif train_label[k] == 0:
            tpm = train_data[k]
            data_abnormal.append(tpm)    
        else:
            pass
    return np.array(data_normal), np.array(data_abnormal)

In [15]:
#train_data0 = train_data[:3][:2]
#train_label0 = train_label[:3]
#val_data0 = val_data[:3][:2]
#val_label0 = val_label[:400]
train_data0 = train_data[:20,:3]*10.0
train_label0 = train_label[:20]
val_data0 = val_data[:20,:3]*10.0
val_label0 = val_label[:20]

train_data_normal,train_data_abnormal = distribute_normal_abnormal(train_data0,train_label0)
training_input = {'Normal':train_data_normal,'Abnormal':train_data_abnormal}
test_data_normal,test_data_abnormal = distribute_normal_abnormal(val_data0,val_label0)
test_input = {'Normal':test_data_normal,'Abnormal':test_data_abnormal}
feature_dim = 3
(training_input['Abnormal'])

array([[-2.19901949e-01,  1.06259361e-01, -1.06205232e-03],
       [-2.25525051e-01,  1.48062073e-01,  2.51522288e-04],
       [ 7.05533288e-02,  2.93040983e-01, -6.80042803e-01],
       [ 6.15588017e-02, -1.17207728e-01,  3.75536419e-01],
       [-2.26389505e-01,  1.48956813e-01,  1.81373209e-03],
       [ 6.19836524e-02, -1.17642581e-01,  3.76462303e-01],
       [ 6.29310496e-02, -1.19337086e-01,  3.78983542e-01],
       [ 6.21868856e-02, -1.17961876e-01,  3.76976170e-01],
       [-2.21952647e-01, -4.99630347e-03, -1.02837924e-01],
       [-1.43354833e-01,  5.48967160e-02, -1.22782178e-01],
       [-1.65550746e-01, -1.82347372e-03, -1.27152428e-01],
       [-2.22004317e-01,  1.15851611e-01,  3.42745334e-04]])

In [10]:
# from qiskit import IBMQ
# IBMQ.load_accounts()

In [17]:
seed = 1088

feature_map = SecondOrderExpansion(feature_dimension=feature_dim, depth=3, entanglement='linear')
qsvm = QSVM(feature_map, training_input, test_input)

backend = BasicAer.get_backend('qasm_simulator')
quantum_instance = QuantumInstance(backend, shots=1024, seed=seed, seed_transpiler=seed)

result = qsvm.run(quantum_instance)

"""declarative approach
params = {
    'problem': {'name': 'classification', 'random_seed': 10598},
    'algorithm': {
        'name': 'QSVM'
    },
    'backend': {'provider': 'qiskit.BasicAer', 'name': 'qasm_simulator', 'shots': 1024},
    'feature_map': {'name': 'SecondOrderExpansion', 'depth': 2, 'entanglement': 'linear'}
}
algo_input = ClassificationInput(training_input, test_input, datapoints[0])
result = run_algorithm(params, algo_input)
"""

print("testing success ratio: {}".format(result['testing_accuracy']))
print("preduction of datapoints:")
#print("ground truth: {}".format(map_label_to_class_name(datapoints[1], qsvm.label_to_class)))
#print("prediction:   {}".format(result['predicted_classes']))

Due to the limited entangler_map, ZIZ is skipped.


testing success ratio: 0.6
preduction of datapoints:
