In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import pickle
import sklearn.multiclass as sm
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve
import warnings
warnings.filterwarnings("ignore")

In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
def normalize(x):
    min_val = np.min(x)
    max_val = np.max(x)
    x = (x-min_val) / (max_val-min_val)
    return x

def convert(data):
    im = data
    im_r = im[0:1024].reshape(32, 32).astype('float64')
    im_g = im[1024:2048].reshape(32, 32).astype('float64')
    im_b = im[2048:].reshape(32, 32).astype('float64')
    img = np.dstack((im_r, im_g, im_b))
#     grey = rgb2gray(img)
    return normalize(data).flatten()

In [3]:
def extract_data():
    train_data=[]
    for i in range(1,2):
        file='./cifar-10-batches-py/data_batch_'+str(i)
        temp=unpickle(file)
        temp_data=[]
        mat_data=[]
        for j in range(len(temp[b'data'])):
            op=convert(temp[b'data'][j])
            mat_data.append(op)
        temp_data.append(np.asarray(mat_data).astype('float64'))
        temp_data.append(np.asarray(temp[b'labels']).reshape(temp[b'data'].shape[0],1))
        train_data.append(temp_data)
    temp=unpickle('./cifar-10-batches-py/batches.meta')
    labels=temp[b'label_names']
    for i in range(len(labels)):
        labels[i]=(labels[i].decode("utf-8"))
    return train_data,labels



In [4]:
kert=np.asarray([[0,1],[1,2]])
print(kert[:,1])

[1 2]


In [15]:
def normalize123(data):
    data=data.astype('float64')
    mean = np.mean(data, axis = 0)
    data -= mean
    return data

def roc(ovo,X_test,Y_test):
    y = label_binarize(Y_test, classes=[0,1,2,3,4,5,6,7,8,9])
    prob = ovo.decision_function(X_test)
    fpr=[0]*10
    tpr=[0]*10
    for i in range(10):
        fpr[i],tpr[i],temp=roc_curve(y[:, i],prob[:, i])
    plt.title('Receiver Operating Characteristic')
    for i in range(10):
         plt.plot(fpr[i],tpr[i],label=('Class for '+str(i)))
    plt.plot([0, 1], [0, 1],linestyle='--')
    plt.legend(loc='lower right')
    plt.show()
    
def svm_linear(X,Y,x,y,model):
    X_train=X
    X_test=x
    Y_train=Y
    Y_test=y
    svm = SVC(kernel='linear')
    if model=='ovo':
        mod = sm.OneVsOneClassifier(svm)
    else:
        mod = sm.OneVsRestClassifier(svm)
    mod.fit(X_train,Y_train.ravel())
    y_pred = mod.predict(X_test)
    accuracy = accuracy_score(Y_test.ravel(), y_pred)
    print("Model accuracy is: ", accuracy)
    results = confusion_matrix(Y_test.ravel(),y_pred) 
    print("Confusion Matrix: ",results) 
#     roc(mod,X_test,Y_test)
    print(mod.estimators_[0].coef_)

def svm_rbf(X,Y,x,y,model):
    X_train=X
    X_test=x
    Y_train=Y
    Y_test=y
    svm = SVC(kernel='rbf')
    if model=='ovo':
        mod = sm.OneVsOneClassifier(svm)
    else:
        mod = sm.OneVsRestClassifier(svm)
    mod.fit(X_train,Y_train.ravel())
    y_pred = mod.predict(X_test)
    accuracy = accuracy_score(Y_test.ravel(), y_pred)
    print("Model accuracy is: ", accuracy)
    results = confusion_matrix(Y_test.ravel(),y_pred) 
    print("Confusion Matrix: ",results) 
#     roc(mod,X_test,Y_test)
    print(mod.estimators_[0].dual_coef_)
    
def svm_poly(X,Y,x,y,model):
    X_train=X
    X_test=x
    Y_train=Y
    Y_test=y
    svm = SVC(kernel='poly')
    if model=='ovo':
        mod = sm.OneVsOneClassifier(svm)
    else:
        mod = sm.OneVsRestClassifier(svm)
    mod.fit(X_train,Y_train.ravel())
    y_pred = mod.predict(X_test)
    accuracy = accuracy_score(Y_test.ravel(), y_pred)
    print("Model accuracy is: ", accuracy)
    results = confusion_matrix(Y_test.ravel(),y_pred) 
    print("Confusion Matrix: ",results) 
    print(mod.estimators_[0].dual_coef_)
#     roc(mod,X_test,Y_test)
#     roc(mod,X_test,Y_test.ravel())

In [16]:
def pca(train):
    pca = PCA(n_components=100, random_state=0, svd_solver='randomized')
    pca.fit(train)
    X_train = pca.transform(train)
    return X_train
    

In [17]:
def pca_proc(i):
    return pca(i)
    
   

In [18]:
def split_train(data):
    split=[]
    split.append([data[0][:2000],data[1][:2000]])
    split.append([data[0][2000:4000],data[1][2000:4000]])
    split.append([data[0][4000:6000],data[1][4000:6000]])
    split.append([data[0][6000:8000],data[1][6000:8000]])
    split.append([data[0][8000:],data[1][8000:]])
    return split

In [19]:
train_data,labels=extract_data()
for i in train_data:
    i[0]=normalize123(i[0])
    i[0]=pca_proc(i[0])
    


In [20]:
print(train_data[0][0].shape)

(10000, 100)


In [21]:
k_cross_split=split_train(train_data[0])
trainx=[]
trainy=[]

trainx.append(np.vstack((k_cross_split[1][0],k_cross_split[2][0],k_cross_split[3][0],k_cross_split[4][0])))
trainy.append(np.vstack((k_cross_split[1][1],k_cross_split[2][1],k_cross_split[3][1],k_cross_split[4][1])))

trainx.append(np.vstack((k_cross_split[0][0],k_cross_split[2][0],k_cross_split[3][0],k_cross_split[4][0])))
trainy.append(np.vstack((k_cross_split[0][1],k_cross_split[2][1],k_cross_split[3][1],k_cross_split[4][1])))

trainx.append(np.vstack((k_cross_split[1][0],k_cross_split[0][0],k_cross_split[3][0],k_cross_split[4][0])))
trainy.append(np.vstack((k_cross_split[1][1],k_cross_split[0][1],k_cross_split[3][1],k_cross_split[4][1])))

trainx.append(np.vstack((k_cross_split[1][0],k_cross_split[2][0],k_cross_split[0][0],k_cross_split[4][0])))
trainy.append(np.vstack((k_cross_split[1][1],k_cross_split[2][1],k_cross_split[0][1],k_cross_split[4][1])))

trainx.append(np.vstack((k_cross_split[1][0],k_cross_split[2][0],k_cross_split[3][0],k_cross_split[0][0])))
trainy.append(np.vstack((k_cross_split[1][1],k_cross_split[2][1],k_cross_split[3][1],k_cross_split[0][1])))

testx=[k_cross_split[0][0],k_cross_split[1][0],k_cross_split[2][0],k_cross_split[3][0],k_cross_split[4][0]]
testy=[k_cross_split[0][1],k_cross_split[1][1],k_cross_split[2][1],k_cross_split[3][1],k_cross_split[4][1]]


In [24]:
def call_svm(X,Y,x,y):
      one_vs_all('linear',X,Y,x,y)
#     print("linear svm ovo start")
#     svm_linear(X,Y,x,y,'ovo')
#     print("linear svm ovr start")
#     svm_linear(X,Y,x,y,'ovr')
#     print("rbf svm ovo start")
#     svm_rbf(X,Y,x,y,'ovo')
#     print("rbf svm ovr start")
#     svm_rbf(X,Y,x,y,'ovr')
#     print("poly svm ovo start")
#     svm_poly(X,Y,x,y,'ovo')
#     print("poly svm ovr start")
#     svm_poly(X,Y,x,y,'ovr')

In [25]:
def change_label(index,input):
    temp=np.where(input == i, 1, 0)
    return temp
def one_vs_all(ker,TrainX,TrainY,TestX,TestY):
    fpr_li=[]
    tpr_li=[]
    for i in range(10):
        mod=SVC(kernel=ker)
        temp_trainy=change_label(i,TrainY)
        temp_testy=change_label(i,TestY)
        mod.fit(TrainX,temp_trainy.ravel())
        probs = mod.decision_function(TestX)
        fpr, tpr, thresholds = roc_curve(temp_testy, probs)
        fpr_li.append(fpr)
        tpr_li.append(tpr)
        y_pred = mod.predict(TestX)
        score_test=mod.score(TestX,temp_testy.ravel())
        print(score_test)
    for i in range(len(fpr_li)):
        plt.plot(fpr_li[i],tpr_li[i],label=('Class for '+str(i)))
    plt.legend(loc='lower right')
    plt.show()

In [26]:
for i in range(5):
    print("svm for "+str(i+1)+" started")
    call_svm(trainx[i],trainy[i],testx[i],testy[i])
    break

svm for 1 started
linear svm ovo start
Model accuracy is:  0.369
Confusion Matrix:  [[ 84  11  19   5   6   6   4  11  39  17]
 [  6  99   6   4   2   5  12  10  14  33]
 [ 16   9  52  17  28  17  28  25  10   1]
 [ 10  20  19  45   8  30  32  10   9  12]
 [  5   8  35  18  57  14  34  29   8   6]
 [  4   6  23  32  16  51  15  19  11   6]
 [  1   5  20  41  19  19  84   9   2   7]
 [ 16  12  15  12  28  11   6  81   2  16]
 [ 44  10   5   8   4   4   4   2 100  22]
 [ 13  42   4   7   1   5  10  14  22  85]]
[[-8.04451090e-02 -4.20348125e-02 -1.65654421e-01 -1.17281850e-01
  -5.88239300e-03  3.59984079e-02 -1.18840860e-01  8.09594040e-03
  -3.43595695e-01  1.56087154e-02 -7.85654719e-02  1.28885974e-01
  -5.67034790e-02 -2.15150856e-01  5.60131160e-02  2.32401281e-02
   5.03274261e-02  4.70766092e-02 -3.03385084e-01 -1.00752935e-01
  -1.64685823e-01 -7.02048932e-02 -5.88053261e-02 -1.51600869e-01
   1.23051320e-02  1.56858130e-01  1.09998645e-01  3.64291540e-03
   2.03679453e-02 -1.43