# Part III - Two layer FC network Sigmoid function included with Stochastic Gradient Descend

In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train: ', x_train.shape)


K = len(np.unique(y_train)) # Classes
Ntr = x_train.shape[0]
Nte = x_test.shape[0]
Din = 3072 # CIFAR10
# Din = 784 # MINIST

# Normalize pixel values
x_train, x_test = x_train / 255.0, x_test / 255.0
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

H = 200  #Hidden layers
std=1e-5
w1 = std*np.random.randn(Din, H)
b1 = np.zeros(H)
w2 = std*np.random.randn(H, K)
b2 = np.zeros(K)
print("w1:", w1.shape)
print("b1:", b1.shape)
print("w2:", w2.shape)
print("b2:", b2.shape)
batch_size = 500

iterations = round(Ntr/batch_size)*300
lr = 0.001
lr_decay= 0.999
reg = 5e-6
loss_history = []
train_acc_history = []
val_acc_history = []
seed = 0
rng = np.random.default_rng(seed=seed)
for t in range(iterations):
    indices = np.random.choice(Ntr,batch_size)
    rng.shuffle(indices)
    # Forward pass
    x = x_train[indices]
    y = y_train[indices]
    a = x.dot(w1)+b1
    h = 1.0/(1+np.exp(-a))
    y_pred = h.dot(w2)+b2

    loss = 1./batch_size*np.square(y_pred-y).sum()+reg*(np.sum(w2*w2)+np.sum(w1*w1))
    loss_history.append(loss)

    # Backward pass
    dy_pred = 1./batch_size*2.0*(y_pred-y)
    dw2 = h.T.dot(dy_pred)+reg*w2
    db2 = dy_pred.sum(axis=0)
    dh = dy_pred.dot(w2.T)
    dw1 = x.T.dot(dh*h*(1-h))+reg*w1
    db1 = (dh*h*(1-h)).sum(axis=0)
    w2-=lr*dw2
    b2-=lr*db2
    w1-=lr*dw1
    b1-=lr*db1
    lr*=lr_decay

    # Printing accuracies and displaying w as images
    if t%10==0:
        print('iteration %d / %d : loss %f'%(t,iterations,loss))







iteration 25000 / 30000 : loss 0.900013
iteration 25010 / 30000 : loss 0.900022
iteration 25020 / 30000 : loss 0.900021
iteration 25030 / 30000 : loss 0.899971
iteration 25040 / 30000 : loss 0.899962
iteration 25050 / 30000 : loss 0.899972
iteration 25060 / 30000 : loss 0.899968
iteration 25070 / 30000 : loss 0.900005
iteration 25080 / 30000 : loss 0.900012
iteration 25090 / 30000 : loss 0.900042
iteration 25100 / 30000 : loss 0.900029
iteration 25110 / 30000 : loss 0.900040
iteration 25120 / 30000 : loss 0.899977
iteration 25130 / 30000 : loss 0.900004
iteration 25140 / 30000 : loss 0.899995
iteration 25150 / 30000 : loss 0.899983
iteration 25160 / 30000 : loss 0.899993
iteration 25170 / 30000 : loss 0.900003
iteration 25180 / 30000 : loss 0.899995
iteration 25190 / 30000 : loss 0.899982
iteration 25200 / 30000 : loss 0.900026
iteration 25210 / 30000 : loss 0.899992
iteration 25220 / 30000 : loss 0.899992
iteration 25230 / 30000 : loss 0.900033
iteration 25240 / 30000 : loss 0.899988
