In [1]:
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/TensorFlow/Basics/tutorial3-neuralnetwork.py

import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [2]:
# the train variables are what you will pass for training the model.
# the test variables are for evaluating the accuracy of the model.
# the shape of these variables is (60000, 28, 28).
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# "to flatten" generally mean to reduce one dimension from a numpy array.
# the variables are reshaped (flattened) because we're passing it on a neural network.
# -1 means keep the first dimension the same as the previous one (60000)
# type is converted from float64 to float32 for faster training.
# to normalize the data, the arrays are divided by 255, because the values that are inside these arrays range from 0-255.
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0

In [3]:
# sequential api is very convenient, but not very flexible.
# it only allows you to have 1 input and output.
model = keras.Sequential(
    [
        # 28 * 28 because of the image size.
        # this is in here just so you can print model.summary().
        keras.Input(shape=(28 * 28)),
        # a dense or a fully connected layer is a linear operation on a layer's input vector.
        # relu is that function you know, returns 1 if >1, otherwise 0.
        # 512 because i don't know
        layers.Dense(512, activation="relu"),
        layers.Dense(256, activation="relu"),
        # 10 because there are 10 digits in the dataset to be determined.
        layers.Dense(10)
    ]
)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               401920    
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [4]:
# DO NOT RUN
# functional api is a bit more flexible than its sequential counterpart
inputs = keras.Input(shape=(784))
y = layers.Dense(512, activation="relu", name="first_layer")(inputs)
y = layers.Dense(256, activation="relu", name="second_layer")(y)
# becasue we use softmax here, from_logits should be false
outputs = layers.Dense(10, activation="softmax")(y)
model1 = keras.Model(inputs=inputs, outputs=outputs)
model1.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 784)]             0         
                                                                 
 first_layer (Dense)         (None, 512)               401920    
                                                                 
 second_layer (Dense)        (None, 256)               131328    
                                                                 
 dense_3 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [5]:
model.compile(
    # a loss function calculates the difference between the predicted and actual values in a given dataset
    # from_logits is set to true because we did not use a softmax function on the last dense layer
    # sparse categorical cross entropy is a loss function that is used when the target variable is a
    # sparse categorical vairable, in other words, an integer.
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    # an optimizer is a function/algorithm that adjusts the weights and learning rates of a neural network
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    # metrics is what is shown in the second column when training.
    # go to https://www.tensorflow.org/api_docs/python/tf/keras/metrics/Accuracy for more metrics.
    metrics=["accuracy"]
)

In [6]:
# batch size means fit that many for some interval, because it would probably train your model better
# than dumping all your crap in one go.
# epochs is how many iterations should the model train.
# verbose shows this kind of information at the bottom. setting it to 1 would instead
# show a progress bar.
model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
# test to see if the model you trained works on data it has never "seen" before
# returns [loss, accuracy]
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 13s - loss: 0.1889 - accuracy: 0.9425 - 13s/epoch - 7ms/step
Epoch 2/5
1875/1875 - 12s - loss: 0.0786 - accuracy: 0.9749 - 12s/epoch - 7ms/step
Epoch 3/5
1875/1875 - 12s - loss: 0.0535 - accuracy: 0.9829 - 12s/epoch - 6ms/step
Epoch 4/5
1875/1875 - 11s - loss: 0.0429 - accuracy: 0.9863 - 11s/epoch - 6ms/step
Epoch 5/5
1875/1875 - 9s - loss: 0.0326 - accuracy: 0.9891 - 9s/epoch - 5ms/step
313/313 - 1s - loss: 0.0788 - accuracy: 0.9796 - 1s/epoch - 4ms/step


[0.07875820994377136, 0.9796000123023987]