# **Simple Neural Network**

In [4]:
 
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # for ignoring information messages from tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers   # add hidden layers
from tensorflow.keras.datasets import mnist  # inbuilt datasets
import warnings
warnings.filterwarnings('ignore')

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

(x_train, y_train), (x_test, y_test) = mnist.load_data()   # split our mnist dataset in train and test sets and load_data()
# print(x_train.shape)  -> (60000, 28, 28)
# print(y_train.shape)  -> (60000,)

# for sending to the neural network we need to flatten our feature in single(one) column.
# In below code we are doing
# 1. flatten the layer by using reshape, -1 was is to keep first value as it is(60000)
# 2. there going to be in nummpy array also float64 so we change to float32 to make less computational and normalizaing also by dividing 255.0 
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0     # 
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0j
print(x_train.shape)
print(x_test.shape)

# Sequential API (Very convenient, not very flexible)
model = keras.Sequential(
    [  # HERE WE ARE SENDING THE LIST 
        # 1. FIRST FULLY CONNECTED LAYER (512) NODE
        # 2. SECOND FULLY CONNECTED LAYERS (256) NODE
        # 3. OUTPUT LAYER 10 NODE
        keras.Input(shape=(28 * 28)),
        layers.Dense(512, activation="relu"),   # here we are applying relu activation function
        layers.Dense(256, activation="relu"),
        layers.Dense(10),
    ]
)
print(model.summary())

model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(256, activation="relu", name="my_layer"))
model.add(layers.Dense(10))

# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation="relu", name="first_layer")(inputs)
x = layers.Dense(256, activation="relu", name="second_layer")(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    # HERE WE ARE TELLING KEARS HOW CONFIGURE THE TRAINING PART OF OUR NETWORKS.
    # 1. HERE WE ARE USING SPARSECATEGORICALCROSSENTROPY AND MAKING from_logits=False 
    # beacuse in our output layers we not apply softmax activation function.
    # 2. HERE WE ARE USING ADAM OPTIMIZER WITH 0.001 LEARNING RATE
    # 3. FOR EVALUATING WE ARE USING ACCURACY
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(lr=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

# Suggestions to imporve furture:
* Try and see what accuracy you can get by increasing the model, training for longer, elecetera. You should be able to get over 98.2% on the test set!
* Try using different optimizers than Adam, for exapmple graident descent with momontume, adagrad, and RMSprop
* Is they any difference if you remove the normalization of the data.