In [1]:
import os
import sys
import pickle
import numpy as np
import pdb
from collections import defaultdict
import random 
import time

from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import *

from functools import wraps
from time import time as _timenow 
from sys import stderr

scaler = StandardScaler()

## Load CIFAR-10 Data

In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        data = pickle.load(fo, encoding='bytes')
    return data

def load_cifar():
    trn_data, trn_labels, tst_data, tst_labels = [], [], [], []
    
    for i in trange(1):
        batchName = './data/data_batch_{0}'.format(i + 1)
        unpickled = unpickle(batchName)
        trn_data.extend(unpickled[b'data'])
        trn_labels.extend(unpickled[b'labels'])
    
    unpickled = unpickle('./data/test_batch')
    tst_data.extend(unpickled[b'data'])
    tst_labels.extend(unpickled[b'labels'])
    return trn_data, trn_labels, tst_data, tst_labels

## Image preprocessing

In [3]:
def image_prep(image):
    scaler.fit(image)
    processed_image = scaler.transform(image)
    return processed_image

## Dimensionality reduction using PCA

In [4]:
def reduce_dim(images, labels, method):
    if method == 'pca':
        pca = PCA(n_components=30)
        imgs = pca.fit_transform(images)
        print("The Dimensions of the images after PCA are", imgs[0].shape)
        return imgs
    
    if method == 'lda':
        lda = LinearDiscriminantAnalysis(n_components=200)
        imgs = lda.fit(images, labels).transform(images)
        print("The Dimensions of the images after LDA are", imgs[0].shape)
        return imgs
    
    if method == 'raw':
        print("The Dimensions of the raw images are", images[0].shape)
        return images        

## Classification

In [22]:
def classify(X, Y, method):    
    if method == 'SVM':
        clf = svm.SVC(kernel='linear')
        print("Started Kernel Training")
        clf.fit(X, Y)
        print("Softmargin linear SVM Model is Prepared")

    if method == 'RBF':
        clf = svm.SVC(kernel='rbf', gamma='scale')
        clf.fit(X, Y)
        print("RBF kernel SVM Model is Prepared")

    if method == 'logistic':
        clf = LogisticRegression(multi_class='auto', solver='lbfgs', max_iter=1000)
        clf.fit(X, Y)
        print("Logistic Regression Model is Prepared")

    if method == 'MLP':
        clf = MLPClassifier(max_iter=1000)
        clf.fit(X, Y)
        print("MLP model is prepared")

    if method == 'CART':
        clf = DecisionTreeClassifier()
        clf.fit(X, Y)
        print("Decision Tree Trained")

    return clf   

## Evaluation 

In [8]:
def test(model, test_data, test_labels):
    predictions = model.predict(test_data)
    print("Accuracy:",accuracy_score(test_labels, predictions))
    print("F1:", f1_score(test_labels, predictions, average='weighted'))

## Dimensionality Reduction

In [7]:
dim_reduce = ["pca", "lda", "raw"]

X, y, _X, _y = load_cifar()

N = 8000

train_imgs = {}
test_imgs = {}

for _ in dim_reduce:
    print(_)
    train_imgs[_] = reduce_dim(X[:N], y[:N], method=_)
    test_imgs[_] = reduce_dim(_X, _y, method=_)

100%|██████████| 1/1 [00:00<00:00, 15.69it/s]


pca
The Dimensions of the images after PCA are (30,)
The Dimensions of the images after PCA are (30,)
lda
The Dimensions of the images after LDA are (9,)
The Dimensions of the images after LDA are (9,)
raw
The Dimensions of the raw images are (3072,)
The Dimensions of the raw images are (3072,)


## Logistic Regression

In [18]:
for _ in dim_reduce:
    print(_)
    model = classify(train_imgs[_], y[:N], method='logistic')
    test(model, test_imgs[_], _y)

pca
Logistic Regression Model is Prepared
Accuracy: 0.0961
F1: 0.09267867665949577
lda
Logistic Regression Model is Prepared
Accuracy: 0.4153
F1: 0.4139908084640867
raw




Logistic Regression Model is Prepared
Accuracy: 0.2787
F1: 0.2780054554707241


## RBF Kernel

In [None]:
for _ in dim_reduce:
    print(_)
    model = classify(train_imgs[_], y[:N], method='RBF')
    test(model, test_imgs[_], _y)

pca
RBF kernel SVM Model is Prepared
Accuracy: 0.1
F1: 0.01818181818181818
lda


  'precision', 'predicted', average, warn_for)


RBF kernel SVM Model is Prepared
Accuracy: 0.4199
F1: 0.4179649579184184
raw
RBF kernel SVM Model is Prepared


## CART

In [14]:
for _ in dim_reduce:
    print(_)
    model = classify(train_imgs[_], y[:N], method='CART')
    test(model, test_imgs[_], _y)

pca
Decision Tree Trained
Accuracy: 0.1075
F1: 0.10701763059782572
lda
Decision Tree Trained
Accuracy: 0.3157
F1: 0.3128037755685489
raw
Decision Tree Trained
Accuracy: 0.2359
F1: 0.23577901598834647


## MLP

In [21]:
for _ in dim_reduce:
    print(_)
    model = classify(train_imgs[_], y[:N], method='MLP')
    test(model, test_imgs[_], _y)

pca
MLP model is prepared
Accuracy: 0.107
F1: 0.09898344564383543
lda
MLP model is prepared
Accuracy: 0.3986
F1: 0.3954523858932417
raw
MLP model is prepared
Accuracy: 0.1
F1: 0.018560050274619155


  'precision', 'predicted', average, warn_for)
