# Assignment a04: Neural Networks
A. Thieshanthan, 180641N

In [5]:
import tensorflow as tf 
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar10
import numpy as np 
import matplotlib.pyplot as plt 
import time

In [6]:
def to_tensor(ar):
    ar = tf.convert_to_tensor(ar, dtype=tf.float32)
    return ar

In [7]:
print(
    tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## 1. Linear Classifier


In [8]:
# Loading and Preparing the dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape', x_train.shape)
print('y_train shape', y_train.shape)

classes = len(np.unique(y_train))
training_samples = x_train.shape[0]
tesing_samples = x_test.shape[0]
input_shape = 32*32*3 # Cifar 10

# Normalizing
x_train, x_test = x_train/255. , x_test/255.
mean_image = np.mean(x_train, axis = 0)
x_train -= mean_image
x_test -= mean_image

# one hot encoding
y_train = to_categorical(y_train, num_classes=classes).T
y_test = to_categorical(y_test, num_classes= classes).T

#reshaping
x_train = np.reshape(x_train, (training_samples, input_shape)).T #shape = 3072*50000
x_test = np.reshape(x_test, (tesing_samples, input_shape)).T #shape = 3072*10000
print('x_train new shape', x_train.shape)
print('y_train new shape', y_train.shape)

x_train shape (50000, 32, 32, 3)
y_train shape (50000, 1)
x_train new shape (3072, 50000)
y_train new shape (10, 50000)


In [9]:
# converting to tensors
x_train = to_tensor(x_train)
y_train = to_tensor(y_train)
x_test = to_tensor(x_test)
y_test = to_tensor(y_test)

In [10]:
#parameters
std = 1e-5
w1 = to_tensor(np.random.randn(classes, input_shape).astype(np.float32) * std)
b1 = to_tensor(np.zeros((classes, 1)).astype(np.float32))
print('w1 shape: ', w1.shape)
print('b1 shape: ', b1.shape)
batch_size = training_samples
iterations = 300
learning_rate = 0.015
learning_rate_decay = 0
reg = 0
loss_history = []
train_acc_history = []
test_loss = []
val_acc_hisory = []
seed = 0
rng = np.random.default_rng(seed=seed)

w1 shape:  (10, 3072)
b1 shape:  (10, 1)


In [11]:
# Training
t0 = time.time()
for t in range(iterations):
    indices = np.arange(training_samples)
    rng.shuffle(indices)

    #forward pass
    y_hat = tf.matmul(w1, x_train) + b1
    #mean squared error loss
    loss = tf.math.reduce_sum((y_train - y_hat)**2).numpy() / batch_size
    loss = float(np.squeeze(loss))
    loss_history.append(loss)
    # accuracy = 1 - loss
    # train_acc_history.append(accuracy)
    
    # backward pass
    dw1 = tf.matmul(y_hat - y_train, x_train, transpose_b=True) * (2/batch_size)
    I = tf.ones((batch_size, 1))
    db1 = tf.matmul(y_hat - y_train, I) * (2/batch_size)
    
    # gradient descent
    w1 = w1 - learning_rate * dw1
    b1 = b1 - learning_rate * db1

    # testing
    y_hat_test = tf.matmul(w1, x_test) + b1
    loss_test = tf.math.reduce_sum((y_test - y_hat_test)**2).numpy() / x_test.shape[1]
    loss_test = np.squeeze(loss_test)
    test_loss.append(loss_test)

    if t % 50 == 0:
        print('epoch: '+ str(t + 1), end = '      ')
        print('loss: %.4f' % (loss_history[t]), end = '     ')
        print('Test loss: %.4f' % (test_loss[t]))
print('After %d epochs: ' % (iterations))
print('Train loss: %f, Test loss: %f' % (loss_history[-1], test_loss[-1]))
t1 = time.time()
print('time taken: %.2f seconds'  % (t1-t0))

epoch: 1      loss: 1.0000     Test loss: 0.9560
epoch: 51      loss: 0.8029     Test loss: 0.8031
epoch: 101      loss: 0.7912     Test loss: 0.7930
epoch: 151      loss: 0.7871     Test loss: 0.7901
epoch: 201      loss: 0.7845     Test loss: 0.7885
epoch: 251      loss: 0.7826     Test loss: 0.7875
After 300 epochs: 
Train loss: 0.781186, Test loss: 0.786779
time taken: 9.66 seconds


In [17]:
# Displaying W as 10 images
w_image = w1.numpy().T
w_image = np.reshape(w_image, (32,32,3,10))
for i in range(10):
    fig, ax = plt.subplots(2,5)
    ax[i//5, i%5].imshow(w_image[:,:,:,i], cmap = 'gray')
plt.imshow()

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping i

TypeError: imshow() missing 1 required positional argument: 'X'

## 2. Two layer fully connected network
### Specifications
    * 200 hidden units
    * Sigmoid activation for hidden layer
    * No output layer activation

In [12]:
def sigmoid(z):
    return 1 / (1 + tf.math.exp(-z))

In [20]:
# Parameters
H = 10
std = 1e-4
w1 = to_tensor(np.random.randn(H, input_shape).astype(np.float32) * std)
b1 = to_tensor(np.zeros((H, 1)))
w2 = to_tensor(np.random.randn(classes, H).astype(np.float32) * std)
b2 = to_tensor(np.zeros((classes, 1)))
print('w1 shape: ', w1.shape)
print('b1 shape: ', b1.shape)
print('w2 shape: ', w2.shape)
print('b2 shape: ', b2.shape)

batch_size_2 = training_samples
iteration_2 = 300
learning_rate_2 = 0.01
learning_rate_decay_2 = 0
reg_2 = 0
loss_history_2 = []
train_acc_history_2 = []
val_acc_hisory_2 = []
seed = 0
rng = np.random.default_rng(seed=seed)

w1 shape:  (10, 3072)
b1 shape:  (10, 1)
w2 shape:  (10, 10)
b2 shape:  (10, 1)


In [21]:
# Training
t0 = time.time()
for t in range(iterations):
    indices = np.arange(training_samples)
    rng.shuffle(indices)

    #forward pass
    z1 = tf.matmul(w1, x_train) + b1
    a1 = sigmoid(z1)
    z2 = tf.matmul(w2, a1) + b2
    y_hat = z2 #no activation
    #mean squared error loss
    loss = tf.math.reduce_sum((y_train - y_hat)**2).numpy() / batch_size
    loss = float(np.squeeze(loss))
    loss_history_2.append(loss)
    

    # backward pass
    # dz2 = (2/batch_size) * (y_hat-y_train)
    # dw2 = tf.matmul(dz2, a1, transpose_b= True)
    # db2 = np.sum(dz2.numpy(), axis = 1, keepdims=True)
    # da1 = tf.matmul(w2, dz2, transpose_a=True)
    # dz1 = da1 * a1*(1-a1)
    # dw1 = tf.matmul(dz1, x_train, transpose_b= True)
    # db1 = np.sum(dz1.numpy(), axis = 1, keepdims=True)
    dz2 =  (y_hat-y_train)
    dw2 = (2/batch_size) * tf.matmul(dz2, a1, transpose_b= True) 
    db2 = (2/batch_size) * np.sum(dz2.numpy(), axis = 1, keepdims=True)
    da1 = tf.matmul(w2, dz2, transpose_a=True)
    dz1 = da1 * a1*(1-a1)
    dw1 = (2/batch_size) *tf.matmul(dz1, x_train, transpose_b= True)
    db1 = (2/batch_size) *np.sum(dz1.numpy(), axis = 1, keepdims=True)
    # gradient descent
    w2 = w2 - learning_rate_2 * dw2
    b2 = b2 - learning_rate_2 * db2
    w1 = w1 - learning_rate_2 * dw1
    b1 = b1 - learning_rate_2 * db1
    if t % 50 == 0:
        print('epoch: '+ str(t + 1))
        print('loss: ' + str(loss_history_2[t]))
print('final loss after %d epochs : %f' % (iterations , loss_history_2[-1]))
t1 = time.time()
print('time taken: ' + str(t1-t0))

epoch: 1
loss: 1.00011265625
epoch: 51
loss: 0.9000703125
epoch: 101
loss: 0.9
epoch: 151
loss: 0.89999984375
epoch: 201
loss: 0.899999765625
epoch: 251
loss: 0.89999953125
final loss after 300 epochs : 0.899999
time taken: 7.97094464302063


## 3. Stochastic Gradient Descent

## 4. Convolutional Neural Network

In [2]:
from tensorflow.keras.models import Sequential