## Part 1 - Obtain the data
- Download MNIST dataset 
- Examines images and labels 

In [2]:
import tensorflow as tf 
import matplotlib.pyplot as plt 

In [3]:
mnist = tf.keras.datasets.mnist #get mnist from the keras datasets package 
(x_train, y_train), (x_test, y_test) = mnist.load_data() #set into training(60k) and testing images(10k)

In [12]:
#check you have the correct datasets
print(len(x_train))
print(len(x_test))

60000
10000


In [13]:
#examine how the images look (x=image(the arrays/bytes it's made out of), y=label)
#each number is from 0-255, to represent the color (remember:255=black, 0=white)
x_train[0]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  

In [None]:
#show what the image looks, pictorially
plt.imshow(x_train[0], cmap='gray')

In [None]:
#print the label name (indicates what picture is supposed to be)
print(y_train[0])

In [None]:
#check the training data to ensure testing and training images are not the same
plt.imshow(x_test[0], cmap='gray')

## Part 2 - Layers
- Convolution layer
- Flatten layer
- Dense layer

In [4]:
#Add the imports (layers and model)
from tensorflow.keras.layers import Conv2D, Flatten, Dense 
from tensorflow.keras import Model 

In [None]:
#The convolutional layer (convolutional layers apply filters to make images smaller). 
#Filters: # of output images. Kernel: # of weights applied to input.  
#Conv2D(filters, kernel_size=, activation='relu')

#Flatten the layer
#Flatten()

#Dense layer - neurons: number of outputs, activation: typically 'relu' or 'softmax'
#Dense(neurons, activation=)

In [5]:
class MNISTModel(Model):
    def __init__(self):
        #initializer for the model 
        super(MNISTModel, self).__init__()
        
        #add in the layers
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.dense1 = Dense(128, activation='relu')
        
        #condense the outputs into smaller ones. 
        self.dense2 = Dense(10, activation='softmax')
        
    #use call notation bc this way, we can simpy write 'Model[]' to use the function 
    def call(self,x): 
        #store the input into convolutional 1 and get input of that. feed the output as input into next layer. 
        x1 = self.conv1(x)
        x2 = self.flatten(x1)
        x3 = self.dense1(x2)
        return self.dense2(x3)
    
model = MNISTModel()

## Part 3 - Loss and Optimizer
- Add a loss function
- Add an optimizer function 
- Add a way to measure loss and accuracy 

In [6]:
#Create a loss and optimizer function 
loss_function = tf.keras.losses.SparseCategoricalCrossentropy() #will calculate total loss depending on which category current output creates. remember, we want to maximize the possibility that the output is only in one category, so we use sparsecategory which does so
optimizer = tf.keras.optimizers.Adam() #optimizer: simply modifies learning rate. We use Adam - modifies learning rate based on how well the training is going (decreases learning rate if it's struggling, increases it if it's easier)

In [7]:
#Specify loss and accuracy metrics for training 
train_loss = tf.keras.metrics.Mean(name='train_loss') #gets avg of all training loss 
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') 

In [8]:
#Specify loss and accuracy metrics for testing 
test_loss = tf.keras.metrics.Mean(name='test_loss') #gets avg of all testing loss 
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') #sees if output falls in one category as desired

## Part 4 - Train and Test Step 
- Add the function to run when training the model
- Add the function to run when testing  the model (diff is that we don't modify the weights here)

In [9]:
#training step
@tf.function
#input = images, outputs = labels (outputs are correct answers for inputs) 
def train_step(inputs, outputs):
    with tf.GradientTape() as tape: #used to apply gradient and change weights/biases  
        predictions = model(inputs) #get model's outputs(predictions) based on current weights/biases 
        loss = loss_function(outputs, predictions) #get the loss based on what the model is outputting and what the actual output is, so that there's always a difference between what the model thinks is a correct answer and the actual correct answer 
    gradients = tape.gradient(loss, model.trainable_variables) #getting changes we need to make - trainable_variables are all the possible weights/biases of each layer 
    optimizer.apply_gradients(zip(gradients, model.trainable_variables)) #making those changes to apply those weights to the trainable_variables so the loss is lower 
    
    #keep track of current loss and accuracy
    train_loss(loss) #should decrease over time 
    train_accuracy(outputs, predictions) #get the actual correct answer and the output's answer to see if model is correct 

In [10]:
#testing tape
@tf.function
def test_step(inputs, outputs):
    with tf.GradientTape() as tape: 
        predictions = model(inputs) 
        loss = loss_function(outputs, predictions) 
  
    test_loss(loss) 
    test_accuracy(outputs, predictions) 

## Part 5 - Formatting Data
- Format our inputs 
- Format our outputs 

In [11]:
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train[..., tf.newaxis] #turns all columns in each row to be its own array. the '...' grabs all values. makes it easier for model to compute  
x_test = x_test[..., tf.newaxis] 

In [None]:
x_train[0] 

In [12]:
#take the training data, slice them up, shuffle them 10,000 at a time (helps to eliminate false positives) and have them as input batches of 32 
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)

#divide test data into batches as well  
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

## Part 6 - Training 
- Write a train loop 
- Train and evaluate the model 

In [13]:
epochs = 5 
for epoch in range(epochs):
    
    #run train step on train data
    for train_inputs, train_labels in train_data:
        train_step(train_inputs, train_labels)
        
    #run test step on test data 
    for test_inputs, test_labels in test_data:
        test_step(test_inputs, test_labels)
        
    #print results for each epoch 
    template = 'Epochs: {}, Train loss: {}, Train accuracy: {}, Test loss: {} Test accuracy: {}'
    print(template.format(
        epoch + 1, 
        train_loss.result(), 
        train_accuracy.result(), 
        test_loss.result(), 
        test_accuracy.result()
    ))
    
    #reset training and test loss and accuracy bc we don't want the previous states to skew results
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()
    



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epochs: 1, Train loss: 0.13236308097839355, Train accuracy: 0.9598666429519653, Test loss: 0.05895889922976494 Test accuracy: 0.9811000227928162
Epochs: 2, Train loss: 0.04054347053170204, Train accuracy: 0.9876999855041504, Test loss: 0.05095290020108223 Test accuracy: 0.9828000068664551
Epochs: 3, Train loss: 0.021493345499038696, Train accuracy: 0.9929999709129333, Test loss: 0.058491699397563934 Test accuracy: 0.9818000197410583
Epochs: 4, Train loss: 0.013767946511507034, Train accuracy: 0.9955333471298218, Test loss: 0.05577279254794121 Test accuracy: 0.9843000173568726
Epochs: 5, Train loss: 0.01016322337090969, Train accuracy: 0.9965000152587891, Test loss: 0.05706515908241272 Test 

### Part 7 - Analyze results
We see that after five epochs, our accuracy is 99.7%, which is pretty good. Test loss increases a bit, which is not a big concern. Our accuracy also increased, from 98.2% to 98.4%, which indicate that our model was successful in its training.