# HW2-3

## Load and process data

In [1]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28)) #Flatting 2 dim image data to vector
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

## Model from Keras

In [2]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([layers.Dense(512,activation="relu"),layers.Dense(10,activation="softmax")])

Metal device set to: Apple M1


2022-02-14 08:49:42.714021: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-02-14 08:49:42.714701: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
model.compile(optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])

In [4]:
model.fit(train_images, train_labels, epochs=5, batch_size=128)

test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"test_acc: {test_acc}")

2022-02-14 08:49:44.265104: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/5


2022-02-14 08:49:44.501787: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 34/313 [==>...........................] - ETA: 0s - loss: 0.0706 - accuracy: 0.9798

2022-02-14 08:49:56.042844: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


test_acc: 0.9800000190734863


## Naive Model

In [5]:
import tensorflow as tf
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation
        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1) 
        self.W = tf.Variable(w_initial_value)
        b_shape = (output_size,)
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)
    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)
    
    @property
    def weights(self):
        return [self.W, self.b]


In [6]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers
    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
           x = layer(x)
        return x
    
    @property
    def weights(self):
       weights = []
       for layer in self.layers:
           weights += layer.weights
       return weights

In [8]:
import math
class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)
    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

In [9]:
def one_training_step(model, images_batch, labels_batch): 
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
        
    gradients = tape.gradient(average_loss, model.weights) 
    update_weights(gradients, model.weights)
    return average_loss

In [10]:
from tensorflow.keras import optimizers

#optimizer = optimizers.SGD(learning_rate=1e-3)
# Use RMSprop optimzer as was used for keras model
optimizer = optimizers.RMSprop() 

def update_weights(gradients, weights): 
    optimizer.apply_gradients(zip(gradients, weights))

In [11]:
def fit(model, images, labels, epochs, batch_size=128): 
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next() 
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter}: {loss:.2f} ")

Define `evaluate` function which takes computes the accuracy and loss for a model given testing data

Inputs:

* `model` naive neural network model as defined interms of `naivedense` and `naivesequental`
* `images` image data
* `labels` corresponding labels for image data

Outputs:

* `accuracy` percentage of testing images correctly classified by `model`
* `loss` crossentropy loss of `model` on the testing data 


In [13]:
def evaluate(model, images,labels):
    predictions = model(images).numpy()
    count = 0
    for idx,val in enumerate(labels):
        if predictions[idx].argmax() == val:
            count+= 1
    accuracy = count / len(labels)
    losses = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, predictions))
    return accuracy, losses
   

**Initialize model**

In [16]:
model = NaiveSequential([
NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax) ])

**Fit model**

In [17]:
fit(model, train_images, train_labels, epochs=5, batch_size=128)

Epoch 0
loss at batch 0: 5.72 
loss at batch 100: 0.92 
loss at batch 200: 0.45 
loss at batch 300: 0.33 
loss at batch 400: 0.54 
Epoch 1
loss at batch 0: 0.25 
loss at batch 100: 0.41 
loss at batch 200: 0.22 
loss at batch 300: 0.30 
loss at batch 400: 0.42 
Epoch 2
loss at batch 0: 0.18 
loss at batch 100: 0.19 
loss at batch 200: 0.21 
loss at batch 300: 0.24 
loss at batch 400: 0.28 
Epoch 3
loss at batch 0: 0.13 
loss at batch 100: 0.12 
loss at batch 200: 0.16 
loss at batch 300: 0.14 
loss at batch 400: 0.19 
Epoch 4
loss at batch 0: 0.09 
loss at batch 100: 0.09 
loss at batch 200: 0.09 
loss at batch 300: 0.11 
loss at batch 400: 0.15 


**Evaluate model performace**

In [18]:
accuracy, losses = evaluate(model, test_images,test_labels)

In [19]:
print(f"test accuracy: {accuracy} test losses: {losses}")

test accuracy: 0.9665 test losses: 0.11370152235031128
