<a href="https://colab.research.google.com/github/kotharisanjana/CMPE258_DeepLearning_Spring2023/blob/main/Assignment_1/DL_Codelab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **MNIST hand written digits classification**

In [1]:
# Parameters  

BATCH_SIZE = 128
EPOCHS = 10
steps_per_epoch = 60000//BATCH_SIZE

In [2]:
# paths of data files

train_images_path  = 'gs://mnist-public/train-images-idx3-ubyte'
train_labels_path  = 'gs://mnist-public/train-labels-idx1-ubyte'
val_images_path = 'gs://mnist-public/t10k-images-idx3-ubyte'
val_labels_path = 'gs://mnist-public/t10k-labels-idx1-ubyte'

In [3]:
# Imports

import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

AUTO = tf.data.experimental.AUTOTUNE

In [4]:
# Helper functions for reading data

def read_image(image_bytes):
    image = tf.io.decode_raw(image_bytes, tf.uint8)
    image = tf.cast(image, tf.float32)/256.0
    image = tf.reshape(image, [28*28])
    return image

def read_label(label_bytes):
    label = tf.io.decode_raw(label_bytes, tf.uint8)
    label = tf.reshape(label, [])
    label = tf.one_hot(label, 10)
    return label

def load_dataset(image_path, label_path):
    image = tf.data.FixedLengthRecordDataset(image_path, 28*28, header_bytes=16)
    image = image.map(read_image, num_parallel_calls=16)
    label = tf.data.FixedLengthRecordDataset(label_path, 1, header_bytes=8)
    label = label.map(read_label, num_parallel_calls=16)
    dataset = tf.data.Dataset.zip((image, label))
    return dataset

def get_training_dataset(image_path, label_path, batch_size):
    dataset = load_dataset(image_path, label_path)
    dataset = dataset.cache() 
    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_validation_dataset(image_path, label_path):
    dataset = load_dataset(image_path, label_path)
    dataset = dataset.cache()
    dataset = dataset.batch(10000, drop_remainder=True)
    dataset = dataset.repeat()
    return dataset

In [5]:
# Get training and validation datasets

training_dataset = get_training_dataset(train_images_path, train_labels_path, BATCH_SIZE)
validation_dataset = get_validation_dataset(val_images_path, val_labels_path)

#### Simply one-layer neural network

In [6]:
# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28*28,)),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

# Defining model architecture
model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                7850      
                                                                 
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [7]:
# Model training
history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# Predict on validation dataset

probabilities = model.predict(validation_dataset, steps=1)
print(probabilities.shape)
print(probabilities)
predicted_labels = np.argmax(probabilities, axis=1)

(10000, 10)
[[7.8748626e-04 8.1971093e-06 3.1598753e-04 ... 9.8955309e-01
  3.6115607e-04 6.0557751e-03]
 [2.3060651e-02 8.3497004e-04 7.6156962e-01 ... 1.3204278e-05
  2.7070902e-02 5.9919650e-05]
 [7.4396067e-04 9.3207967e-01 1.8221354e-02 ... 7.5208955e-03
  1.4428624e-02 3.2247140e-03]
 ...
 [2.7504033e-05 1.6964381e-04 4.0811874e-04 ... 1.7022252e-02
  2.6384143e-02 7.6570340e-02]
 [1.2521682e-02 1.7651666e-02 4.4961357e-03 ... 6.9619492e-03
  3.9220786e-01 4.4354559e-03]
 [1.6698792e-03 3.6459807e-07 2.6247371e-03 ... 1.0788053e-06
  3.7591813e-05 7.9703286e-06]]


#### Adding dense layers to existing network

In [9]:
# Adding more layers to the neural network

model_ = tf.keras.Sequential([
      tf.keras.layers.Input(shape=(28*28,)),
      tf.keras.layers.Dense(200, activation='sigmoid'),
      tf.keras.layers.Dense(60, activation='sigmoid'),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

# Defining model architecture
model_.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_.summary()

# Fit model
history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 200)               157000    
                                                                 
 dense_2 (Dense)             (None, 60)                12060     
                                                                 
 dense_3 (Dense)             (None, 10)                610       
                                                                 
Total params: 169,670
Trainable params: 169,670
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#### Altering activation function and optimizer

In [10]:
# Replacing sigmoid wtih relu activation function and sgd with adam optimizer

model_ = tf.keras.Sequential([
      tf.keras.layers.Input(shape=(28*28,)),
      tf.keras.layers.Dense(200, activation='relu'),
      tf.keras.layers.Dense(60, activation='relu'),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

# Defining model architecture
model_.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_.summary()

# Fit model
history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 200)               157000    
                                                                 
 dense_5 (Dense)             (None, 60)                12060     
                                                                 
 dense_6 (Dense)             (None, 10)                610       
                                                                 
Total params: 169,670
Trainable params: 169,670
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#### Using decaying learning rate

In [11]:
# lr decay function
def lr_decay(epoch):
  return 0.01 * math.pow(0.6, epoch)

# lr schedule callback
lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

In [12]:
# Using decaying learning rate

model_ = tf.keras.Sequential([
      tf.keras.layers.Input(shape=(28*28,)),
      tf.keras.layers.Dense(200, activation='relu'),
      tf.keras.layers.Dense(60, activation='relu'),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

# Defining model architecture
model_.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_.summary()

# Fit model
history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 200)               157000    
                                                                 
 dense_8 (Dense)             (None, 60)                12060     
                                                                 
 dense_9 (Dense)             (None, 10)                610       
                                                                 
Total params: 169,670
Trainable params: 169,670
Non-trainable params: 0
_________________________________________________________________

Epoch 1: LearningRateScheduler setting learning rate to 0.01.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.006.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0036.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0021599999999999996.
Epoch 4

#### Making the neural network with 4 layers

In [13]:
# Adding more layers to the neural network 

model_ = tf.keras.Sequential([
      tf.keras.layers.Input(shape=(28*28,)),
      tf.keras.layers.Dense(200, activation='relu'),
      tf.keras.layers.Dense(100, activation='relu'),
      tf.keras.layers.Dense(60, activation='relu'),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

# Defining model architecture
model_.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_.summary()

# Fit model
history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 200)               157000    
                                                                 
 dense_11 (Dense)            (None, 100)               20100     
                                                                 
 dense_12 (Dense)            (None, 60)                6060      
                                                                 
 dense_13 (Dense)            (None, 10)                610       
                                                                 
Total params: 183,770
Trainable params: 183,770
Non-trainable params: 0
_________________________________________________________________

Epoch 1: LearningRateScheduler setting learning rate to 0.01.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.006.
Epoch 2/10

Epoch 3: LearningRateScheduler

#### Adding dropout to control overfitting

In [14]:
model = tf.keras.Sequential(
  [
      tf.keras.layers.Input(shape=(28*28,)),
      tf.keras.layers.Dense(200, activation='relu'),
      tf.keras.layers.Dropout(0.25),
      tf.keras.layers.Dense(100, activation='relu'),
      tf.keras.layers.Dropout(0.25),
      tf.keras.layers.Dense(60, activation='relu'),
      tf.keras.layers.Dropout(0.25),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# print model layers
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_14 (Dense)            (None, 200)               157000    
                                                                 
 dropout (Dropout)           (None, 200)               0         
                                                                 
 dense_15 (Dense)            (None, 100)               20100     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 dense_16 (Dense)            (None, 60)                6060      
                                                                 
 dropout_2 (Dropout)         (None, 60)                0         
                                                                 
 dense_17 (Dense)            (None, 10)               

In [15]:
# Fit model
history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])


Epoch 1: LearningRateScheduler setting learning rate to 0.01.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.006.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0036.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0021599999999999996.
Epoch 4/10

Epoch 5: LearningRateScheduler setting learning rate to 0.001296.
Epoch 5/10

Epoch 6: LearningRateScheduler setting learning rate to 0.0007775999999999998.
Epoch 6/10

Epoch 7: LearningRateScheduler setting learning rate to 0.0004665599999999999.
Epoch 7/10

Epoch 8: LearningRateScheduler setting learning rate to 0.00027993599999999994.
Epoch 8/10

Epoch 9: LearningRateScheduler setting learning rate to 0.00016796159999999993.
Epoch 9/10

Epoch 10: LearningRateScheduler setting learning rate to 0.00010077695999999997.
Epoch 10/10


#### Convolutional Neural Network

In [16]:
# Build CNN

model = tf.keras.Sequential([
    tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(kernel_size=3, filters=12, activation='relu'),
    tf.keras.layers.Conv2D(kernel_size=6, filters=24, strides=2, activation='relu'),
    tf.keras.layers.Conv2D(kernel_size=6, filters=32, strides=2, activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# print model layers
model.summary()

# Fit model
history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 28, 28, 1)         0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 12)        120       
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 24)        10392     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 32)          27680     
                                                                 
 flatten (Flatten)           (None, 288)               0         
                                                                 
 dense_18 (Dense)            (None, 10)                2890      
                                                                 
Total params: 41,082
Trainable params: 41,082
Non-trai

#### CNN with padding and more dense layers

In [17]:
# Build model

model = tf.keras.Sequential([
      tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),
      tf.keras.layers.Conv2D(kernel_size=3, filters=12, activation='relu', padding='same'),
      tf.keras.layers.Conv2D(kernel_size=6, filters=24, activation='relu', padding='same', strides=2),
      tf.keras.layers.Conv2D(kernel_size=6, filters=32, activation='relu', padding='same', strides=2),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(200, activation='relu'),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_1 (Reshape)         (None, 28, 28, 1)         0         
                                                                 
 conv2d_3 (Conv2D)           (None, 28, 28, 12)        120       
                                                                 
 conv2d_4 (Conv2D)           (None, 14, 14, 24)        10392     
                                                                 
 conv2d_5 (Conv2D)           (None, 7, 7, 32)          27680     
                                                                 
 flatten_1 (Flatten)         (None, 1568)              0         
                                                                 
 dense_19 (Dense)            (None, 200)               313800    
                                                                 
 dense_20 (Dense)            (None, 10)               

#### Adding dropout layer to above architecture to reduce overfitting since validation loss is shooting up

In [18]:
# Build model

model = tf.keras.Sequential([
      tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),
      tf.keras.layers.Conv2D(kernel_size=3, filters=12, activation='relu', padding='same'),
      tf.keras.layers.Conv2D(kernel_size=6, filters=24, activation='relu', padding='same', strides=2),
      tf.keras.layers.Conv2D(kernel_size=6, filters=32, activation='relu', padding='same', strides=2),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(200, activation='relu'),
      tf.keras.layers.Dropout(0.4),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

# Define architecture

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Fit model

history = model_.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_2 (Reshape)         (None, 28, 28, 1)         0         
                                                                 
 conv2d_6 (Conv2D)           (None, 28, 28, 12)        120       
                                                                 
 conv2d_7 (Conv2D)           (None, 14, 14, 24)        10392     
                                                                 
 conv2d_8 (Conv2D)           (None, 7, 7, 32)          27680     
                                                                 
 flatten_2 (Flatten)         (None, 1568)              0         
                                                                 
 dense_21 (Dense)            (None, 200)               313800    
                                                                 
 dropout_3 (Dropout)         (None, 200)              

#### Batch normalization

In [19]:
model = tf.keras.Sequential([
      tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),
      tf.keras.layers.Conv2D(kernel_size=3, filters=12, use_bias=False, padding='same'),
      tf.keras.layers.BatchNormalization(center=True, scale=False),
      tf.keras.layers.Activation('relu'),
      tf.keras.layers.Conv2D(kernel_size=6, filters=24, use_bias=False, padding='same', strides=2),
      tf.keras.layers.BatchNormalization(center=True, scale=False),
      tf.keras.layers.Activation('relu'),
      tf.keras.layers.Conv2D(kernel_size=6, filters=32, use_bias=False, padding='same', strides=2),
      tf.keras.layers.BatchNormalization(center=True, scale=False),
      tf.keras.layers.Activation('relu'),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(200, use_bias=False),
      tf.keras.layers.BatchNormalization(center=True, scale=False),
      tf.keras.layers.Activation('relu'),
      tf.keras.layers.Dropout(0.3),
      tf.keras.layers.Dense(10, activation='softmax')
  ])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# print model layers
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_3 (Reshape)         (None, 28, 28, 1)         0         
                                                                 
 conv2d_9 (Conv2D)           (None, 28, 28, 12)        108       
                                                                 
 batch_normalization (BatchN  (None, 28, 28, 12)       36        
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 28, 28, 12)        0         
                                                                 
 conv2d_10 (Conv2D)          (None, 14, 14, 24)        10368     
                                                                 
 batch_normalization_1 (Batc  (None, 14, 14, 24)       72        
 hNormalization)                                      

In [20]:
# Train model
history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validation_dataset, validation_steps=1, callbacks=[lr_decay_callback])


Epoch 1: LearningRateScheduler setting learning rate to 0.01.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.006.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0036.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0021599999999999996.
Epoch 4/10

Epoch 5: LearningRateScheduler setting learning rate to 0.001296.
Epoch 5/10

Epoch 6: LearningRateScheduler setting learning rate to 0.0007775999999999998.
Epoch 6/10

Epoch 7: LearningRateScheduler setting learning rate to 0.0004665599999999999.
Epoch 7/10

Epoch 8: LearningRateScheduler setting learning rate to 0.00027993599999999994.
Epoch 8/10

Epoch 9: LearningRateScheduler setting learning rate to 0.00016796159999999993.
Epoch 9/10

Epoch 10: LearningRateScheduler setting learning rate to 0.00010077695999999997.
Epoch 10/10
