# 1 Importing libraries and loading data

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import norm 

# useful function to load CIFAR-10 datasets
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [4]:
# loading datasets

# fashion MNIST
fashion_mnist_train = pd.read_csv("./datasets/fashion-mnist_train.csv") 
fashion_mnist_test = pd.read_csv("./datasets/fashion-mnist_test.csv")

# CIFAR-10 train
baseName = "./datasets/cifar-10-batches-py/data_batch_"
X_train = np.array([row for batch in [unpickle(baseName + str(i))[b'data'] for i in range(1, 6)] for row in batch])
y_train = np.array([label for batch in [unpickle(baseName + str(i))[b'labels'] for i in range(1, 6)] for label in batch])
cifar10_train = pd.DataFrame(X_train)
cifar10_train["class"] = y_train

# CIFAR-10 test
X_test = np.array([row for batch in [unpickle("./datasets/cifar-10-batches-py/test_batch")[b'data'] for i in range(1, 6)] for row in batch])
y_test = np.array([label for batch in [unpickle("./datasets/cifar-10-batches-py/test_batch")[b'labels'] for i in range(1, 6)] for label in batch])
cifar10_test = pd.DataFrame(X_test)
cifar10_test["class"] = y_test

# 2 Data analysis

# 3 Classification

## 3.1 Naive Bayes implementation

In [198]:
class NaiveBayes:
    def __init__(self):
        pass
    
    def fit(self, df, target_name):
        self.means = df.groupby(target_name).mean().as_matrix()
        self.stddevs = df.groupby(target_name).std().as_matrix()
    
    def predict(self, features_df):
        return np.apply_along_axis(self.predict_class_by_row, 1, features_df.as_matrix())
        
    def predict_class_by_row(self, row):
        R = np.repeat(row[None, :], len(self.means), axis=0)
        P = np.prod(np.exp(-(np.power(np.subtract(R, self.means), 2)) / (2*np.power(self.stddevs, 2))), axis=1)
        return np.argmax(P)

## 3.2 Naive Bayes on CIFAR-10

In [199]:
model = NaiveBayes()
model.fit(cifar10_train, "class")

In [201]:
import time

start = time.time()
model.predict(cifar10_train.drop(columns="class"))
end = time.time()
print(end - start)

131.45602893829346


## 3.3 Naive Bayes on fashion MNIST