In [None]:
import numpy as np
import pandas as pd
import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.layers import wrappers
#from tensorflow.python.keras.utils.vis_utils import plot_model
from keras.utils.vis_utils import plot_model
import seaborn as sns
%matplotlib inline 
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pydot_ng as pydot
init_notebook_mode(connected=True)

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
train_images = train_images / 255.0
test_images = test_images / 255.0

In [None]:
plt.figure()
plt.imshow(train_images[0])
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i])
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
#Mean Subtraction
train_images_mean = []
mean = np.mean(train_images, axis = 2)
for i in range(train_images.shape[0]):
    image = []
    for j in range(train_images.shape[1]):
        image.append(train_images[i][j] - mean[i][j])
    train_images_mean.append(image)
train_images_mean = np.array(train_images_mean)
train_images_mean.shape

In [None]:
plt.figure()
plt.imshow(train_images_mean[0])
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images_mean[i])
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
#Normalization
train_images_normal = []
std = np.std(train_images, axis = 2)
for i in range(train_images.shape[0]):
    image = []
    for j in range(train_images.shape[1]):
        if std[i][j] == 0:
            image.append(train_images_mean[i][j])
        else:
            image.append(train_images_mean[i][j] / std[i][j])
    train_images_normal.append(image)
train_images_normal = np.array(train_images_normal)
train_images_normal.shape

In [None]:
plt.figure()
plt.imshow(train_images_normal[0])
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images_mean[i])
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
train_size, train_rows, train_cols = train_images.shape
train_images_mean = train_images_mean.reshape(train_size, train_rows*train_cols)
train_images_normal = train_images_normal.reshape(train_size, train_rows*train_cols)

In [None]:
cov = np.dot(train_images_mean.T, train_images_mean) / train_images_mean.shape[0]
U,S,V = np.linalg.svd(cov)
Xrot_train = np.dot(train_images_mean, U)
Xrot_train_reduced = np.dot(train_images_mean, U[:, :100])
Xrot_t = Xrot_train.reshape(train_size, train_rows, train_cols)
Xrot_train_r = Xrot_train_reduced.reshape(train_size, 10, 10)

In [None]:
plt.figure()
plt.imshow(Xrot_train_r[0])
plt.colorbar()
plt.grid(False)
plt.show()
#

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(Xrot_train_r[i])
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
#Mean Subtraction
test_images_mean = []
mean = np.mean(test_images, axis = 2)
for i in range(test_images.shape[0]):
    image = []
    for j in range(test_images.shape[1]):
        image.append(test_images[i][j] - mean[i][j])
    test_images_mean.append(image)
test_images_mean = np.array(test_images_mean)
test_images_mean.shape

In [None]:
#Normalization
test_images_normal = []
std = np.std(test_images, axis = 2)
for i in range(test_images.shape[0]):
    image = []
    for j in range(test_images.shape[1]):
        if std[i][j] == 0:
            image.append(test_images_mean[i][j])
        else:
            image.append(test_images_mean[i][j] / std[i][j])
    test_images_normal.append(image)
test_images_normal = np.array(test_images_normal)
test_images_normal.shape

In [None]:
test_size, test_rows, test_cols = test_images.shape
test_images_mean = test_images_mean.reshape(test_size, test_rows*test_cols)
test_images_normal = test_images_normal.reshape(test_size, test_rows*test_cols)

In [None]:
cov = np.dot(test_images_mean.T, test_images_mean) / test_images_mean.shape[0]
U,S,V = np.linalg.svd(cov)
Xrot_test = np.dot(test_images_mean, U)
Xrot_test_reduced = np.dot(test_images_mean, U[:, :100])

In [None]:
class GradientDescent:

    def __init__(self, learning_rate=.001, max_iters=1e4, epsilon=1e-5):
        self.learning_rate = learning_rate
        self.max_iters = max_iters
        self.epsilon = epsilon

    def run(self, gradient_fn, x, y, params, biases):
        norms = np.array([np.inf])
        t = 1
        while np.any(norms > self.epsilon) and t < self.max_iters:
            grad, bias = gradient_fn(x, y, params)
            for p in range(len(params)):
                params[p] -= self.learning_rate * grad[p]
                #biases[p] -= self.learning_rate * bias[p]

            t += 1
            norms = np.array([np.linalg.norm(g) for g in grad])
        return params, biases


class Layer:

    def __init__(self, activation, input_units, output_units, bias):
        self.activation = activation  #Set the activation function
        self.input_units = input_units  #Set number of input units
        self.output_units = output_units  #Set number of output units
        self.bias = bias  #Set the bias
        #self.weights = np.random.randn(self.input_units, self.output_units) * 0.01
        self.weights = np.random.uniform(-0.1, 0.1, size=(self.input_units, self.output_units))

    #Allows for the weight parameter to be updated with new values
    def update_weights(self, new_weights):
        self.weights = new_weights

    def update_biases(self, new_biases):
        self.bias = new_biases

    #Calculates the output given input and current weights
    def forward(self, input):
        #return self.activation(np.add(np.dot(input, self.weights), self.bias))
        return self.activation(np.dot(input, self.weights)) # + np.ndarray.flatten(self.bias))


class MLP:

    def __init__(self, activation, activation_prime, hidden, num_units, bias):
        self.activation = activation  #activation function
        self.hidden = hidden  #number of hidden layers
        self.num_units = num_units  #number of units in the hidden layer
        self.bias = bias #bias terms
        self.activation_prime = activation_prime

    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()

    #Create a network by creating an array of layers, all of which are connected with each other
    def initialize_network(self, input_length, num_classes):
        self.network = []
        #Create the input layer
        self.network.append(Layer(self.activation, input_length, self.num_units, self.bias))
        #Add as many hidden layers as specified
        for i in range(self.hidden - 1):
            self.network.append(Layer(self.activation, self.num_units, self.num_units, self.bias))
        #Add the output layer
        self.network.append(Layer(self.softmax, self.num_units, num_classes, self.bias))

    def fit(self, X, y, optimizer, num_classes=len(class_names)):
        #Initialize the network
        N, D = X.shape
        self.initialize_network(D, num_classes)

        def gradient(x, y, params):
            dparams = []
            dbiases = []
            #Predict output on input x, save the output of each layer
            input = x
            forward_passes = []
            for i, param in enumerate(params):
                forward_passes.append(self.activation(np.dot(input, param)))
                input = forward_passes[-1]
            yh = forward_passes[-1]
            dy = yh - y
            dz = np.dot(dy, params[-1].T)

            dw = np.dot(forward_passes[-2].T, dy)/N
            db = np.sum(dz.T, axis=1, keepdims=True)/N

            dparams.append(dw)
            dbiases.append(db)

            for i in reversed(range(1, len(forward_passes) - 1)):
                dweight = np.dot(forward_passes[i - 1].T, np.multiply(dz, self.activation_prime(forward_passes[i])))/N
                dparams.append(dweight)
                dz = np.dot(dz, params[i].T)
                dbiases.append(np.sum(dz.T, axis=1, keepdims=True)/N)

            dweight = np.dot(x.T, np.multiply(dz, self.activation_prime(forward_passes[0])))/N
            dparams.append(dweight)

            return [dparam for dparam in reversed(dparams)], [dbias for dbias in reversed(dbiases)]

        params0 = [layer.weights for layer in self.network]

        params, biases = optimizer.run(gradient, X, y, params0, biases=[self.bias] * len(params0))


        for i, layer in enumerate(self.network):
            layer.update_weights(params[i])
            #layer.update_biases(np.ndarray.flatten(biases[i]))

        return self


    #Predict the output of the network given input by running the input through all the layers of the network sequentially
    def predict(self, X):
        input = X

        for layer in self.network:
            print("INPUT", input)
            output = layer.forward(input)
            print("OUTPUT", output)
            input = output

        return input


In [None]:
relu = lambda v: np.maximum(0, v)
relu_leaky = lambda v: np.where(v > 0, v, v*0.01)
relu_leaky_prime = lambda v: np.where(v > 0, 1, 0.01)
relu_prime = lambda v: np.where(v > 0, 1, 0)
tanh = lambda x: (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
tanh_prime = lambda x: 1 - np.square(tanh(x))
logistic = lambda z: 1./ (1 + np.exp(-z))

# optimizer = GradientDescent()
#weights = np.random.rand(train_images_mean.shape[0], train_images_mean.shape[1])
# mlp = MLP(activation = relu, hidden = 0, unit = 64, bias = .1)
# yh = mlp.fit(train_images_mean, train_labels, optimizer).predict(test_images_mean)

one_hot_encode_train_labels = np.zeros((train_labels.size, train_labels.max()+1))
one_hot_encode_train_labels[np.arange(train_labels.size),train_labels] = 1

#784
model = MLP(activation = relu, activation_prime=relu_prime, hidden = 2, num_units = 128, bias = .1)
optimizer = GradientDescent(learning_rate=.1, max_iters=20)
yh = model.fit(train_images_normal, one_hot_encode_train_labels, optimizer).predict(test_images_normal)


In [None]:
def evaluate_acc(y_true, y_pred):
    count = 0
    for i in range(len(y_pred)):
      if y_true[i] == y_pred[i]:
        count = count + 1
    return count / len(y_true)

In [None]:
print(test_labels)
print([np.argmax(t) for t in yh])
print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

In [None]:
p = np.array([[-2, -2], [-2, -2], [2, -1], [3, 5]])
print(p.shape)
print(np.where(p > 0, 1, 0))


In [None]:
print(one_hot_encode_train_labels)
print(yh)
print([np.argmax(t) + 1 for t in yh])
print(test_labels)
print(yh[0])
print(np.argmax(yh[0]))
print(np.where(yh[0] == np.amax(yh[0])))

In [None]:
print(yh)
print(train_labels)
print(one_hot_encode_train_labels)


In [None]:
print(train_images_mean)
print(train_labels)
print(test_images_mean)

In [None]:
optimizer = GradientDescent(learning_rate=.01, max_iters=2000)

#Task 3: Experiment 1

#hidden layer = 0
mlp_no_hidden = MLP(activation = relu, activation_prime=relu_prime, hidden = 0, num_units = 64, bias = .1)

yh_1_hidden0 = mlp_no_hidden.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

def evaluate_acc(y_true, y_pred):
    count = 0
    for i in range(len(y_pred)):
        if y_true[i] == y_pred[i]:
            count = count + 1
    return count / len(y_true)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_1_hidden0]))
yh = mlp_no_hidden.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))

#hidden layer = 1
mlp_single_layer = MLP(activation = relu, activation_prime=relu_prime, hidden = 1, num_units = 128, bias = .1)

yh_1_hidden1 = mlp_single_layer.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_1_hidden1]))
yh = mlp_single_layer.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))

#hidden layer = 2
mlp_double_layer = MLP(activation = relu, activation_prime=relu_prime, hidden = 2, num_units = 128, bias = .1)

yh_1_hidden2 = mlp_double_layer.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)


print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_1_hidden2]))
yh = mlp_double_layer.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)


print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

print("Experiment 1: hidden layers = 2")
target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))

#Task 3: Experiemnt 2
print("starting experiment 2")
mlp_tanh_copy = MLP(activation = tanh, activation_prime=tanh_prime, hidden = 2, num_units = 128, bias = .1)

yh_2_tanh = mlp_tanh_copy.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_2_tanh]))
yh = mlp_tanh_copy.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))

mlp_relu_leaky_copy = MLP(activation = relu_leaky, activation_prime=relu_leaky_prime, hidden = 2, num_units = 128, bias = .1)

yh_2_relu_leaky = mlp_relu_leaky_copy.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_2_relu_leaky]))
yh = mlp_relu_leaky_copy.fit(train_images_mean, one_hot_encode_train_labels, optimizer).predict(test_images_mean)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))


#Task 3: Experiment 4
mlp_relu_unnormalized = MLP(activation = relu, hidden = 2, activation_prime=relu_prime, num_units = 128, bias = .1)

train_images.reshape(60000, 28*28)

yh_4 = mlp_relu_unnormalized.fit(train_images, one_hot_encode_train_labels, optimizer).predict(test_images)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_4]))
yh = mlp_relu_unnormalized.fit(train_images, one_hot_encode_train_labels, optimizer).predict(test_images)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))

mlp_relu_normalized = MLP(activation = relu, hidden = 2, activation_prime=relu_prime, num_units = 128, bias = .1)

yh_4_normal = mlp_relu_normalized.fit(train_images_normal, one_hot_encode_train_labels, optimizer).predict(test_images_normal)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh_4_normal]))
yh = mlp_relu_normalized.fit(train_images_normal, one_hot_encode_train_labels, optimizer).predict(test_images_normal)

print(evaluate_acc(test_labels, [np.argmax(t) for t in yh]))

predicted_classes = [np.argmax(t) for t in yh]

p = predicted_classes[:10000]
y = test_labels[:10000]
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]
print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

target_names = ["Class {} ({}) :".format(i,class_names[i]) for i in range(10)]
print(classification_report(test_labels, predicted_classes, target_names=target_names))

#Task 3: Experiment 5

# Model
model = Sequential()
# Add convolution 2D
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 kernel_initializer='he_normal',
                 input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, 
                 kernel_size=(3, 3), 
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))


model.compile(loss=keras.losses.sparse_categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

assert train_images.shape == (60000, 28, 28)

train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)

train_images = train_images / 255.0
test_images = test_images / 255.0


train_images, validation_images, train_labels, validation_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=2018)

print("Fashion MNIST train -  rows:",train_images.shape[0]," columns:", train_images.shape[1:4])
print("Fashion MNIST valid -  rows:",validation_images.shape[0]," columns:", validation_images.shape[1:4])
print("Fashion MNIST test -  rows:",test_images.shape[0]," columns:", test_images.shape[1:4])


train_model = model.fit(train_images, train_labels,
                  batch_size=1, 
                  epochs=3,
                  validation_data=(validation_images, validation_labels))

score = model.evaluate(test_images, test_labels, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])