In [46]:
# Load modules

import pickle
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [47]:
# Load data

with open('./cifar-10-batches-py/data_batch_1', 'rb') as f:
    dict = pickle.load(f, encoding='bytes')

b1_train_data = dict[b'data']
b1_train_labels = dict[b'labels']

with open('./cifar-10-batches-py/test_batch', 'rb') as f:
    dict = pickle.load(f, encoding='bytes')

b_test_data = dict[b'data']
b_test_labels = dict[b'labels']

In [48]:
def splitDataPairwise(label1,label2):
    
    # Train set, test set for label1-label2 classification
    train_data = []
    train_labels = []
    test_data = []
    test_labels = []
    
    # Populate train set with label1, label2 samples from train batch 1
    for i,label in enumerate(b1_train_labels):
        if(label == label1):
            train_data.append(b1_train_data[i])
            train_labels.append(label1)
            
        elif(label == label2):
            train_data.append(b1_train_data[i])
            train_labels.append(label2)
            
    # Populate test set with label1, label2 samples from test batch
    for i,label in enumerate(b_test_labels):
        if(label == label1):
            test_data.append(b_test_data[i])
            test_labels.append(label1)
            
        elif(label == label2):
            test_data.append(b_test_data[i])
            test_labels.append(label2)
    
    return train_data, train_labels, test_data, test_labels

In [61]:
# Linear SVM

# dog vs truck (easy)
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,9)
clf = LinearSVC()
clf.fit(train_data, train_labels)
print(clf.score(test_data, test_labels))

# deer vs horse (hard)
train_data, train_labels, test_data, test_labels = splitDataPairwise(4,7)
clf = LinearSVC()
clf.fit(train_data, train_labels)
print(clf.score(test_data, test_labels))

# dog vs cat (harder)
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,3)
clf = LinearSVC()
clf.fit(train_data, train_labels)
print(clf.score(test_data, test_labels))



0.791




0.5425
0.651




In [63]:
# Linear SVM with PCA

# dog vs truck
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,9)
reduced_train_data = pca.fit_transform(train_data)
reduced_test_data = pca.fit_transform(test_data)

clf = LinearSVC()
clf.fit(reduced_train_data, train_labels)
print(clf.score(reduced_test_data, test_labels))

# deer vs horse
train_data, train_labels, test_data, test_labels = splitDataPairwise(4,7)
reduced_train_data = pca.fit_transform(train_data)
reduced_test_data = pca.fit_transform(test_data)

clf = LinearSVC()
clf.fit(reduced_train_data, train_labels)
print(clf.score(reduced_test_data, test_labels))

# dog vs cat
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,3)
pca = PCA(n_components=32)
reduced_train_data = pca.fit_transform(train_data)
reduced_test_data = pca.fit_transform(test_data)

clf = LinearSVC()
clf.fit(reduced_train_data, train_labels)
print(clf.score(reduced_test_data, test_labels))



0.475




0.6035
0.5105




In [64]:
# Linear SVM with LDA

# dog vs truck
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,9)
reduced_train_data = lda.fit_transform(train_data, train_labels)
reduced_test_data = lda.fit_transform(test_data, test_labels)

clf = LinearSVC()
clf.fit(reduced_train_data, train_labels)
print(clf.score(reduced_test_data, test_labels))

# deer vs horse
train_data, train_labels, test_data, test_labels = splitDataPairwise(4,7)
reduced_train_data = lda.fit_transform(train_data, train_labels)
reduced_test_data = lda.fit_transform(test_data, test_labels)

clf = LinearSVC()
clf.fit(reduced_train_data, train_labels)
print(clf.score(reduced_test_data, test_labels))

# dog vs cat
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,3)
lda = LDA(n_components=32)
reduced_train_data = lda.fit_transform(train_data, train_labels)
reduced_test_data = lda.fit_transform(test_data, test_labels)

clf = LinearSVC()
clf.fit(reduced_train_data, train_labels)
print(clf.score(reduced_test_data, test_labels))



1.0




0.993




0.986




In [65]:
# Linear SVM with PCA followed by LDA

# dog vs truck
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,9)
reduced_train_data = pca.fit_transform(train_data)
double_reduced_train_data = lda.fit_transform(reduced_train_data, train_labels)
reduced_test_data = pca.fit_transform(test_data)
double_reduced_test_data = lda.fit_transform(reduced_test_data, test_labels)

clf = LinearSVC()
clf.fit(double_reduced_train_data, train_labels)
print(clf.score(double_reduced_test_data, test_labels))

# deer vs horse
train_data, train_labels, test_data, test_labels = splitDataPairwise(4,7)
reduced_train_data = pca.fit_transform(train_data)
double_reduced_train_data = lda.fit_transform(reduced_train_data, train_labels)
reduced_test_data = pca.fit_transform(test_data)
double_reduced_test_data = lda.fit_transform(reduced_test_data, test_labels)

clf = LinearSVC()
clf.fit(double_reduced_train_data, train_labels)
print(clf.score(double_reduced_test_data, test_labels))

# dog vs cat
train_data, train_labels, test_data, test_labels = splitDataPairwise(5,3)
reduced_train_data = pca.fit_transform(train_data)
double_reduced_train_data = lda.fit_transform(reduced_train_data, train_labels)
reduced_test_data = pca.fit_transform(test_data)
double_reduced_test_data = lda.fit_transform(reduced_test_data, test_labels)

clf = LinearSVC()
clf.fit(double_reduced_train_data, train_labels)
print(clf.score(double_reduced_test_data, test_labels))

0.86
0.7025
0.617
