# Assignment a04: Neural Networks
A. Thieshanthan, 180641N

In [20]:
import tensorflow as tf 
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar10
import numpy as np 
import matplotlib.pyplot as plt 
import time

In [74]:
def to_tensor(ar):
    ar = tf.convert_to_tensor(ar, dtype=tf.float32)
    return ar

In [67]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


## 1. Linear Classifier


In [75]:
# Loading and Preparing the dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape', x_train.shape)
print('y_train shape', y_train.shape)

classes = len(np.unique(y_train))
training_samples = x_train.shape[0]
tesing_samples = x_test.shape[0]
input_shape = 32*32*3 # Cifar 10

# Normalizing
x_train, x_test = x_train/255. , x_test/255.
mean_image = np.mean(x_train, axis = 0)
x_train -= mean_image
x_test -= mean_image

# one hot encoding
y_train = to_categorical(y_train, num_classes=classes).T
y_test = to_categorical(y_test, num_classes= classes).T

#reshaping
x_train = np.reshape(x_train, (training_samples, input_shape)).T #shape = 3072*50000
x_test = np.reshape(x_test, (tesing_samples, input_shape)).T #shape = 3072*10000
print('x_train new shape', x_train.shape)
print('y_train new shape', y_train.shape)

x_train shape (50000, 32, 32, 3)
y_train shape (50000, 1)
x_train new shape (3072, 50000)
y_train new shape (10, 50000)


In [76]:
# converting to tensors
x_train = to_tensor(x_train)
y_train = to_tensor(y_train)
x_test = to_tensor(x_test)
y_test = to_tensor(y_test)

In [99]:
#parameters
std = 1e-5
w1 = to_tensor(np.random.randn(classes, input_shape) * std)
b1 = to_tensor(np.zeros((classes, 1)))
print('w1 shape: ', w1.shape)
print('b1 shape: ', b1.shape)
batch_size = training_samples
iterations = 300
learning_rate = 0.015
learning_rate_decay = 0
reg = 0
loss_history = []
train_acc_history = []
val_acc_hisory = []
seed = 0
rng = np.random.default_rng(seed=seed)

w1 shape:  (10, 3072)
b1 shape:  (10, 1)


In [100]:
# Training
t0 = time.time()
for t in range(iterations):
    indices = np.arange(training_samples)
    rng.shuffle(indices)

    #forward pass
    y_hat = tf.matmul(w1, x_train) + b1
    #mean squared error loss
    loss = tf.math.reduce_sum((y_train - y_hat)**2).numpy() / batch_size
    loss = float(np.squeeze(loss))
    loss_history.append(loss)
    accuracy = 1 - loss
    train_acc_history.append(accuracy)

    # backward pass
    dw1 = tf.matmul(y_hat - y_train, x_train, transpose_b=True) * (2/batch_size)
    I = tf.ones((batch_size, 1))
    db1 = tf.matmul(y_hat - y_train, I) * (2/batch_size)
    
    # gradient descent
    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1
    if t % 50 == 0:
        print('epoch: '+ str(t + 1))
        print('loss: ' + str(loss_history[t]))
print('final loss after %d epochs : %f' % (iterations , loss_history[-1]))
t1 = time.time()
print('time taken: ' + str(t1-t0))

epoch: 1
loss: 0.99999140625
epoch: 51
loss: 0.802895390625
epoch: 101
loss: 0.79119015625
epoch: 151
loss: 0.787059609375
epoch: 201
loss: 0.784476484375
epoch: 251
loss: 0.7826096875
final loss after 300 epochs : 0.781185
time taken: 6.162921905517578


## 2. Two layer fully connected network
### Specifications
    * 200 hidden units
    * Sigmoid activation for hidden layer
    * No output layer activation

In [84]:
def sigmoid(z):
    return 1 / (1 + tf.math.exp(-z))

In [95]:
# Parameters
H = 200
std = 1e-4
w1 = to_tensor(np.random.randn(H, input_shape) * std)
b1 = to_tensor(np.zeros((H, 1)))
w2 = to_tensor(np.random.randn(classes, H) * std)
b2 = to_tensor(np.zeros((classes, 1)))
print('w1 shape: ', w1.shape)
print('b1 shape: ', b1.shape)
print('w2 shape: ', w2.shape)
print('b2 shape: ', b2.shape)

batch_size_2 = training_samples
iteration_2 = 300
learning_rate_2 = 0.2
learning_rate_decay_2 = 0
reg_2 = 0
loss_history_2 = []
train_acc_history_2 = []
val_acc_hisory_2 = []
seed = 0
rng = np.random.default_rng(seed=seed)

w1 shape:  (200, 3072)
b1 shape:  (200, 1)
w2 shape:  (10, 200)
b2 shape:  (10, 1)


In [96]:
# Training
t0 = time.time()
for t in range(iterations):
    indices = np.arange(training_samples)
    rng.shuffle(indices)

    #forward pass
    z1 = tf.matmul(w1, x_train) + b1
    a1 = sigmoid(z1)
    z2 = tf.matmul(w2, a1) + b2
    y_hat = z2 #no activation
    #mean squared error loss
    loss = tf.math.reduce_sum((y_train - y_hat)**2).numpy() / batch_size
    loss = float(np.squeeze(loss))
    loss_history_2.append(loss)
    accuracy_2 = 1 - loss
    train_acc_history_2.append(accuracy)

    # backward pass
    dz2 = (2/batch_size) * (y_hat-y_train)
    dw2 = tf.matmul(dz2, a1, transpose_b= True)
    db2 = np.sum(dz2.numpy(), axis = 1, keepdims=True)
    da1 = tf.matmul(w2, dz2, transpose_a=True)
    dz1 = da1 * a1*(1-a1)
    dw1 = tf.matmul(dz1, x_train, transpose_b= True)
    db1 = np.sum(dz1.numpy(), axis = 1, keepdims=True)
    
    # gradient descent
    w2 = w2 - learning_rate_2 * dw2
    b2 = b2 - learning_rate_2 * db2
    w1 = w1 - learning_rate_2 * dw1
    b1 = b1 - learning_rate_2 * db1
    if t % 50 == 0:
        print('epoch: '+ str(t + 1))
        print('loss: ' + str(loss_history_2[t]))
print('final loss after %d epochs : %f' % (iterations , loss_history_2[-1]))
t1 = time.time()
print('time taken: ' + str(t1-t0))

epoch: 1
loss: 0.999518828125
epoch: 51
loss: 0.900000078125
epoch: 101
loss: 0.900000078125
epoch: 151
loss: 0.900000078125
epoch: 201
loss: 0.900000078125
epoch: 251
loss: 0.9
final loss after 300 epochs : 0.900000
time taken: 80.33636474609375
