<a href="https://colab.research.google.com/github/kjmobile/lb/blob/main/14_Neural_Network_2_Q.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Network 2: Deep Learning

In [None]:
# Import library and set seed
import tensorflow as tf

tf.keras.utils.set_random_seed(42)
tf.config.experimental.enable_op_determinism()

In [None]:
from tensorflow import keras

(train_input, train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

In [None]:
print(train_input[0])

In [None]:
train_input[0]

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(3, 10, figsize=(10, 3))

for i in range(30):
    row = i // 10
    col = i % 10
    ax[row, col].imshow(train_input[i], cmap='gray_r')
    ax[row, col].axis('off')
plt.show()

### Preprocessing : Normalization and further dividing validation set

In [None]:
from sklearn.model_selection import train_test_split

train_scaled = train_input / 255.0
train_scaled = train_scaled.reshape(-1, 28*28) #This reshape() can be replaced with the keras.flatten() layer as shown below

train_scaled, val_scaled, train_target, val_target = train_test_split(
    train_scaled, train_target, test_size=0.2, random_state=42)

## Two layers

## Design a Deep Neural Network

In [None]:
dense1 = keras.layers.Dense(100, activation='sigmoid', input_shape=(784,))
dense2 = keras.layers.Dense(10, activation='softmax')

In [None]:
model = keras.Sequential([dense1, dense2])

In [None]:
model.summary()

# We have two dense layers (aka, "hidden" layers), but you may experiment it by adding more layers
# The number of units('nodes' or 'neurons') of the first dense layer was set to 100.
# The rule of thumb is that the number should be at least larger than the size of the output layer
# The the number of units for the second layer is set to 10, softmax activation, because it is 10 -item multicategory classification task
# param # 78500 <= 784(input)*100 (units) + 100(bias terms)
# Then why 1010?
# Why output shape shows the sample count is None, why?: The number is left flexible in the model, since the fit() will use the 'mini-batch SGD' by default as optimizer and one batch size is set to 32.

## c.f., Alternative Syntax equivalent to above

In [None]:
model_1 = keras.Sequential([
    keras.layers.Dense(100, activation='sigmoid', input_shape=(784,), name='hidden'),
    keras.layers.Dense(10, activation='softmax', name='output')
], name='Fashion MNIST Model')

model_1.summary()

In [None]:
model = keras.Sequential()
model.add(keras.layers.Dense(100, activation='sigmoid', input_shape=(784,)))
model.add(keras.layers.Dense(10, activation='softmax'))


In [None]:
model.summary()

In [None]:
import numpy as np
np.unique(train_target, return_counts=True)

In [None]:
model.compile(loss='sparse_categorical_crossentropy', metrics='accuracy')
# the default optimizer is RMSprop(Root Mean Square Propagation) with running rate of 0.001
model.fit(train_scaled, train_target, epochs=10)
#loss function is set to 'sparse_categorical_crossentropy' because target value is set by integer rather than one-hot encoding.
#i.e., Keras will transform the target value into 'sparse' format before applying categorical crossentropy

#How many times of backpropagation occur in this model setting?

In [None]:
model.evaluate(val_scaled, val_target)

## Using ReLU, instead of Sigmoid, for as an activation function

In [None]:
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(28, 28)))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

# ReLU is favored in image classification models over sigmoid due to its computational efficiency,
# requiring simpler calculations, and its mitigation of the vanishing gradient problem,
# ensuring gradients remain large and effective during backpropagation.

# In here we used keras's 'flatten' layer to reshape the input data without using train_scaled.reshape(-1, 28*28) as shown above.
# But since the flatten layer, only reshaped the input and did not contribute to learning, the model is still a neural net with the depth 2, not 3.


In [None]:
model.summary()

In [None]:
(train_input, train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

train_scaled = train_input / 255.0

train_scaled, val_scaled, train_target, val_target = train_test_split(
    train_scaled, train_target, test_size=0.2, random_state=42)

In [None]:
model.compile(loss='sparse_categorical_crossentropy', metrics='accuracy')

model.fit(train_scaled, train_target, epochs=10)

In [None]:
model.evaluate(val_scaled, val_target)

## Optimizers: see the slides to compare them

In [None]:
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics='accuracy')
# optimizer ='sgd' is short hand for below, which is exactly same as this.

In [None]:
sgd = keras.optimizers.SGD()
model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy', metrics='accuracy')

In [None]:
sgd = keras.optimizers.SGD(learning_rate=0.1) # But if we want to change the defualt learning rate,0.01, we need to explicitly intantiate an sgd object passing a relavant argument.

In [None]:
sgd = keras.optimizers.SGD(momentum=0.9, nesterov=True)
# modifying the SGD optimizer to momentum optimizer and then to nesterov momentum optimizer.
# in most cases,nesterov improves the performance the default sgd.

In [None]:
adagrad = keras.optimizers.Adagrad()
model.compile(optimizer=adagrad, loss='sparse_categorical_crossentropy', metrics='accuracy')

In [None]:
rmsprop = keras.optimizers.RMSprop()
model.compile(optimizer=rmsprop, loss='sparse_categorical_crossentropy', metrics='accuracy')

In [None]:
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(28, 28)))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')

model.fit(train_scaled, train_target, epochs=10)

In [None]:
model.evaluate(val_scaled, val_target)

#### In this model, what are the hyperparmaters that human researcher must determine?
- Number of Hidden Layers
- Number of neurons (units) in each hidden layer
- Choice of Activation Function
- Number of mini-batch size (by default it is set to 32 for the defaulted mini-batch SGD optimizer)