In [1]:
#Load USPS Dataset
import h5py
path="usps.h5"
with h5py.File(path, 'r') as usps_data:
    train = usps_data.get('train')
    data_train = train.get('data')[:]
    target_train = train.get('target')[:]
    test = usps_data.get('test')
    data_test = test.get('data')[:]
    target_test = test.get('target')[:]

#Import libraries 
import numpy as np
import math 
from sklearn.svm import SVC
import matplotlib.pyplot as plt 
import time
from sklearn.model_selection import train_test_split
%matplotlib inline

#Split data into training set proper, calibration set and test set
data_train_p, data_cal, target_train_p, target_cal = train_test_split(data_train, target_train, random_state=0)

print("Training Proper Size:", len(data_train_p))
print("Calibration Set Size:",len(data_cal))
print("Test Set Size:",len(data_test))

Training Proper Size: 5468
Calibration Set Size: 1823
Test Set Size: 2007


In [2]:
def set_variables(data_train,target_train,data_test,target_test,kernel):
    SVM=SVC(kernel=kernel)
    score=np.zeros(len(data_train)+1)
    p_values=np.zeros((len(data_test),len(set(target_train))))
    prediction=np.zeros(len(data_test))
    confidence=np.zeros(len(data_test))
    credibility=np.zeros(len(data_test))
    sum_p=0
    return SVM, score, p_values, prediction, confidence, credibility, sum_p

In [3]:
def calculate_score(extend_train_x,extend_train_y,SVM):
    SVM.fit(extend_train_x,extend_train_y)
    d_func=SVM.decision_function(extend_train_x)
    row=np.arange(0,len(d_func))
    result_score=-1*d_func[row,extend_train_y]
    return result_score,SVM

In [4]:
def svm_conformal_predictor(data_train,target_train,data_test,target_test,kernel):
    #Set variables
    SVM, score, p_values, prediction, confidence, credibility, sum_p = \
    set_variables(data_train,target_train,data_test,target_test,kernel)
    #Loop through test samples
    for i in range(len(data_test)):
        #Create new dataset which is the training set + the test sample
        extend_train_x=np.row_stack((data_train,data_test[i]))
        for j in set(target_train): #Test all possible labels
            #Extend labels with test sample
            extend_train_y=np.append(target_train,j)
            #Calculate conformity scores for each sample in the extended set
            result_score,SVM=\
                calculate_score(extend_train_x,extend_train_y,SVM)
            #Calculate p-values of test sample
            p_values[i][j]=np.mean(result_score>=result_score[-1])
        #Use p-values of test sample to calculate various measures
        prediction[i]=int(np.argmax(p_values[i]))
        confidence[i]=1- p_values[i][np.argsort(p_values[i])[-2]]
        credibility[i]=np.max(p_values[i])
        sum_p = sum_p + np.sum(p_values[i]) - p_values[i][target_test[i]]
    false_p_value=sum_p/(2*len(data_test))
    return prediction, confidence, credibility, false_p_value

In [5]:
start_time = time.time()
prediction, confidence, credibility, false_p_value = \
svm_conformal_predictor(data_train,target_train,data_test[0:10],target_test[0:10],"linear" )
print("--- %s seconds ---" % (time.time() - start_time))

--- 168.38916659355164 seconds ---


In [7]:
print("Linear SVM Accuracy: ", np.mean(prediction==target_test[0:10]))
print("Linear SVM Avg False p-value: ", false_p_value)
print("Linear Avg Confidence: ",np.mean(confidence))
print("Linear Avg Credibility: ",np.mean(credibility))

Linear SVM Accuracy:  0.7
Linear SVM Avg False p-value:  0.17447202413603946
Linear Avg Confidence:  0.8196379594075699
Linear Avg Credibility:  0.6471475589687328
