In [22]:
import os
import _pickle as pickle
import numpy as np
import matplotlib.pyplot as plt

In [14]:
def unpickle(file):
    with open(file, 'rb') as foo:
        dict = pickle.load(foo, encoding='bytes')
    return dict

In [27]:
data = unpickle('./cifar-10-batches-py/data_batch_1')
X = np.zeros((data[b'data'].shape[0], int(data[b'data'].shape[1]/3)))
X += 0.3 * data[b'data'][:, :1024] 
X += 0.59 * data[b'data'][:, 1024:2048]
X += 0.11 * data[b'data'][:, 2048:3072]
X.shape

(10000, 1024)

In [38]:
Y = np.array(data[b'labels'])
Y.shape

(10000,)

In [40]:
# PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X)
X_pca.shape

(10000, 100)

In [46]:
# LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA()
X_lda = lda.fit_transform(X, Y)
X_lda.shape

(10000, 9)

In [47]:
# LDA after PCA
X_lp = lda.fit_transform(X_pca, Y)
X_lp.shape

(10000, 9)

In [175]:
# Pairwise classification
class PairWiseClassification:
    def __init__(self):
        pass

    def fit(self, X, Y):
        self.labels = set(Y)
        self.labelled_X = {}
        for label in self.labels:
            self.labelled_X[label] = []
        for i in range(Y.shape[0]):
            self.labelled_X[Y[i]].append(X[i])
        for label in self.labels:
            self.labelled_X[label] = np.array(self.labelled_X[label])

        from sklearn.linear_model import LogisticRegression as LR
        self.classifiers = {}
        for label_1 in self.labels:
            for label_2 in self.labels:
                if label_1 != label_2:
                    n1 = self.labelled_X[label_1].shape[0]
                    n2 = self.labelled_X[label_2].shape[0]
                    _X = np.zeros(((n1 + n2), X.shape[1]))
                    _Y = np.ones(((n1 + n2), ))
                    _X[:n1, :] = self.labelled_X[label_1]
                    _X[n1:, :] = self.labelled_X[label_2]
                    _Y[:n1] *= label_1
                    _Y[n1:] *= label_2
                    self.classifiers[(label_1, label_2)] = LR(solver='liblinear').fit(_X, _Y)
                
    def predict(self, X):
        X = np.array(X)
        _Y = []
        for clf in self.classifiers.values():
            _Y.append(clf.predict(X))
        _Y = np.array(_Y).T
        
        Y = []
        from collections import Counter
        for i in range(_Y.shape[0]):
            c = Counter(_Y[i])
            Y.append(int(c.most_common(1)[0][0]))
        return np.array(Y)

In [176]:
test_data = unpickle('./cifar-10-batches-py/test_batch')
test_X = np.zeros((test_data[b'data'].shape[0], int(test_data[b'data'].shape[1]/3)))
test_X += 0.3 * test_data[b'data'][:, :1024] 
test_X += 0.59 * test_data[b'data'][:, 1024:2048]
test_X += 0.11 * test_data[b'data'][:, 2048:3072]
test_Y = np.array(test_data[b'labels'])

test_X.shape, test_Y.shape

((10000, 1024), (10000,))

In [177]:
test_X_pca = pca.fit_transform(test_X)
test_X_lda = lda.fit_transform(test_X, test_Y)
test_X_lp = lda.fit_transform(test_X_pca, test_Y)

In [178]:
pwc = PairWiseClassification()

In [179]:
pwc.fit(X_pca, Y)
test_Y_pca = pwc.predict(test_X_pca)

pwc.fit(X_lda, Y)
test_Y_lda = pwc.predict(test_X_lda)

pwc.fit(X_lp, Y)
test_Y_lp = pwc.predict(test_X_lp)

In [183]:
def find_accuracy(Y, _Y):
    score = 0
    for i in range(Y.shape[0]):
        if Y[i] == _Y[i]:
            score += 1
    return (score / Y.shape[0]) * 100

In [192]:
find_accuracy(test_Y_pca, test_Y)

11.83

In [191]:
find_accuracy(test_Y_lda, test_Y)

19.37

In [190]:
find_accuracy(test_Y_lp, test_Y)

25.7