In [2]:
import pickle
import numpy as np
import random
import skimage.transform as ski
from scipy.stats import multivariate_normal
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
import keras


def class_acc(pred,gt):     #Y=gt are truth labels, and pred are predicted labes
    matching_values = 0
    for index in range(len(gt)-1):
        if(pred[index]==gt[index]):
            matching_values = matching_values + 1
    acc = (((matching_values)/(len(gt))))  #Percent
    return acc

def cifar10_classifier_random(X):
    Y = []  #Labels
    for image in X:  #For each image lets guess its label
        random_number = random.randint(0,9)
        Y.append(random_number)
    Y = np.array(Y)
    return Y

def cifar10_classifier_1nn(x,trdata,trlabels):  #x is testdat

    distances = np.sum(np.abs(trdata-x), axis=1)  #Get the distance of each image to test_image x
    return trlabels[np.argmin(distances)]              #Return images label which has the smallest distance to test image x

def divide_to_classes(Y):
    airplane = np.where(Y == 0)
    automobile = np.where(Y == 1)
    bird = np.where(Y == 2)
    cat = np.where(Y == 3)
    deer = np.where(Y == 4)
    dog = np.where(Y == 5)
    frog = np.where(Y == 6)
    horse = np.where(Y == 7)
    ship = np.where(Y == 8)
    truck = np.where(Y == 9)
    return airplane,automobile,bird,cat,deer,dog,frog,horse,ship,truck


def cifar10_color(X):
    Xp = []
    for image in X:
        Xp.append(np.squeeze(ski.resize(image, (1,1))))

    return np.squeeze(Xp)

def cifar10_2x2_color(X):
    Xp = []
    for image in X:
        Xp.append(np.squeeze(ski.resize(image, (2, 2))))
    print(np.squeeze(Xp).shape)
    return np.squeeze(Xp)

def cifar10_NxN_color(X):
    Xp = []
    N = 4
    for image in X:
        Xp.append(np.squeeze(ski.resize(image, (N, N))))

    return np.squeeze(Xp)

def normal_dist(x , mu, sigma):
    prob_density = (1 / np.sqrt(2 * np.pi * sigma)) * np.exp(-1 / (2 * sigma) * (x - mu) ** 2)
    return prob_density


def cifar_10_naivebayes_learn(Xp, Y):
    mu_array = []
    sigma_array = []
    priori_array = []
    for class_indexes in divide_to_classes(Y):
        mu = np.mean(Xp[class_indexes], axis=0)
        mu_array.append(np.squeeze(mu))
        sigma = np.var(Xp[class_indexes], axis=0)
        sigma_array.append(np.squeeze(sigma))
        priori_array.append((len(Xp[class_indexes])) / (len(Xp)))

    return np.array(mu_array), np.array(sigma_array), np.array(priori_array)

def cifar10_classifier_naivebayes(x, mu, sigma, p):  #ONE IMAGE AT THE TIME
    priori_sum = sum(p)
    p_gaussian = np.squeeze(normal_dist(x, mu, sigma)) #3 gaussian distriputions
    dim = np.ndim(p_gaussian.shape)
    posterior_numerators = np.squeeze(p_gaussian[ :,0] * p_gaussian[ :,1] * p_gaussian[ :,2] * p)
    posterior_denumerator = sum(p_gaussian[ :,0]) * sum(p_gaussian[ :,1]) * sum(p_gaussian[ :,2]) * priori_sum
    class_probabilities = posterior_numerators/posterior_denumerator

    return np.argmax(class_probabilities)

def cifar_10_bayes_learn(Xf, Y):
    mu_array = []
    cov_array = []
    priori_array = []
    for class_indexes in divide_to_classes(Y):
        mu = np.mean(Xf[class_indexes], axis=0)
        mu_array.append(np.squeeze(mu))
        covariace = np.cov((Xf[class_indexes]).T)
        cov_array.append(np.squeeze(covariace))
        priori_array.append((len(Xf[class_indexes])) / (len(Xf)))

    return np.array(mu_array), np.array(cov_array), np.array(priori_array)


def cifar10_classifier_bayes(x, mu, sigma, p):
    priori_sum = sum(p)
    p_array = []
    for index in range(0,10):
        p_array.append(multivariate_normal.pdf(x, mu[index], sigma[index]))

    p_gaussian = np.array(p_array)
    posterior_numerators = p_gaussian * p
    posterior_denumerator = sum(p_gaussian) * priori_sum
    class_probabilities = posterior_numerators / posterior_denumerator

    return np.argmax(class_probabilities)

def unpickle(file):
    with open(file, 'rb') as f:
        dict = pickle.load(f, encoding="latin1")
    return dict


def load_training_data(databatches):
    X = []
    Y = []
    for batch in range(1, databatches+1):
        datadict = unpickle('/Users/Simon/machine_learning/Week3/ex2/cifar-10-batches-py/data_batch_{}'.format(batch))
        X.append(datadict["data"])
        Y.append(datadict["labels"])
    X = np.concatenate(X)
    Y = np.concatenate(Y)
    return X, Y

#Training data
databatches = 5  # Select from 1-5

X_train, Y_train = load_training_data(databatches)
X_train = np.array(X_train).astype("float32")
X_train = X_train.reshape(databatches*10000, 3, 32, 32).transpose(0,2,3,1).astype("float32")
Y_train = np.array(Y_train) # T_train, Labes of classes in X


datadict = unpickle('/Users/Simon/machine_learning/Week3/ex2/cifar-10-batches-py/test_batch')
test_X = datadict["data"]
test_X = np.array(test_X).astype("float32")
test_Y = datadict["labels"]
test_X = test_X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float32")
test_Y = np.array(test_Y)

In [None]:
def one_hot_vectors(Y):
    Y_one_hot = np.empty([len(Y),10])
    for index in range(0,10):     
        one_hot = np.zeros((9,), dtype=int)
        one_hot = np.insert(one_hot, index, 1)
        class_indexes = divide_to_classes(Y)[index]
        Y_one_hot[class_indexes] = one_hot
        
    return np.squeeze(Y_one_hot)
                     
def get_argmax(Y):
    Y_list = []
    for probabilities in Y:
        Y_list.append(np.argmax(probabilities))
    return np.array(Y_list)
    
    
    
Y_train_vector = one_hot_vectors(Y_train)

X_train = keras.utils.normalize(X_train, axis=1)
test_X = keras.utils.normalize(test_X, axis=1)

model = Sequential()
model.add(Flatten())
model.add(Dense(100, input_shape=(32,32,3), activation='relu'))
model.add(Dense(10, input_shape=(32,32,3), activation=tf.nn.softmax))
keras.optimizers.SGD(lr=0.9)
model.compile(optimizer='SGD', loss='mse', metrics=['accuracy'])
model.fit(X_train, Y_train_vector, epochs=100)

Y_train_pred = np.squeeze(model.predict(X_train))
Y_test_pred = np.squeeze(model.predict(test_X))

# "model.predicts" returns probabilities for every class,
# but we only need to get the class with largest probability

Y_train_pred = get_argmax(Y_train_pred)
Y_test_pred = get_argmax(Y_test_pred)

print(f'Classication accuracy (train data): {class_acc(Y_train_pred, Y_train)}%')
print(f'Classication accuracy (test data): {class_acc(Y_test_pred, test_Y)}%')