In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

np.set_printoptions(precision=8, suppress=True, linewidth=120) #prinintg from numpy
print(tf.__version__)

In [None]:
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
# tf.debugging.set_log_device_placement(True)

# # Create some tensors
# a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
# b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
# c = tf.matmul(a, b)

# print(c)

In [None]:
(X_train, y_train), (X_test, y_test) = load_data()

print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')

In [None]:
print(X_train[1])

In [None]:
X_train = X_train / 255
X_test = X_test / 255

In [None]:
plt.imshow(X_train[1], cmap='gray_r') 
# plt.axis('off')

In [None]:
# for i in range(1,11):
#     plt.subplot(1,10,i)
#     plt.axis('off')
#     plt.imshow(X_train[i-1], cmap='gray_r')
#     plt.title(y_train[i-1], color='black', fontsize=10)
# plt.show()

In [None]:
# OUR MODELS

#SMALL MODEL
model_small = Sequential()
model_small.add(Flatten(input_shape=(28,28)))
model_small.add(Dense(units=8, activation='relu'))
model_small.add(Dropout(0.2))#20% of data for dropout
model_small.add(Dense(units=10, activation='softmax')) #output

model_small.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model_small.summary()



In [None]:
# model trening- fit ! 
history_small = model_small.fit(X_train, y_train, epochs=30, batch_size=50, validation_split=0.2, verbose=0)

In [None]:
metrics_small=pd.DataFrame(history_small.history)
metrics_small.head()

In [None]:
# OUR MODELS

#BASIC MODEL
model_basic = Sequential()
model_basic.add(Flatten(input_shape=(28,28)))
model_basic.add(Dense(units=128, activation='relu'))
model_basic.add(Dropout(0.2))#20% of data for dropout
model_basic.add(Dense(units=10, activation='softmax')) #output

model_basic.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model_basic.summary()

In [None]:
# model trening- fit ! 
history_basic = model_basic.fit(X_train, y_train, epochs=30, batch_size=50, validation_split=0.2)

In [None]:
metrics_basic=pd.DataFrame(history_basic.history)
metrics_basic.head()

In [None]:
# loss_test_basic, accuracy_test_basic =  model_big.evaluate(X_test, y_test)

In [None]:
# OUR MODELS

#BIG MODEL
model_big = Sequential()
model_big.add(Flatten(input_shape=(28,28)))
model_big.add(Dense(units=256, activation='relu'))
model_big.add(Dense(units=512, activation='relu'))
model_big.add(Dropout(0.2))#20% of data for dropout on reg.
model_big.add(Dense(units=10, activation='softmax')) #output

model_big.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model_big.summary()


In [None]:
history_big = model_big.fit(X_train, y_train, epochs=30, batch_size=50, validation_split=0.2)

In [None]:
metrics_big=pd.DataFrame(history_big.history)
metrics_big.head()

In [None]:
## Three models and its accuracy/ loss plots below. As we can see our big model is doing almost as good as the basic one if we look at the accuracy.
## It means that we don`t need that kind of complex architecture. Loss and accuracy for our small model may be a little weak for us.
## Big model overfits very quickly 

## SMALL MODEL - one hidden layer with 8 units
## BASIC MODEL - one hidden layer with 128 units
## BIG MODEL - two hidden layer with 512/256 units

fig = make_subplots(rows=2, cols=1)

fig.add_trace(go.Scatter(y=metrics_small['loss'],name='loss_small'), row=1, col=1)
fig.add_trace(go.Scatter(y=metrics_small['accuracy'],name='accuracy_small'), row=2, col=1)
fig.add_trace(go.Scatter(y=metrics_small['val_loss'],name='val_loss_small'), row=1, col=1)
fig.add_trace(go.Scatter(y=metrics_small['val_accuracy'],name='val_accuracy_small'), row=2, col=1)

fig.add_trace(go.Scatter(y=metrics_basic['loss'],name='loss_basic'), row=1, col=1)
fig.add_trace(go.Scatter(y=metrics_basic['accuracy'],name='accuracy_basic'), row=2, col=1)
fig.add_trace(go.Scatter(y=metrics_basic['val_loss'],name='val_loss_basic'), row=1, col=1)
fig.add_trace(go.Scatter(y=metrics_basic['val_accuracy'],name='val_accuracy_basic'), row=2, col=1)

fig.add_trace(go.Scatter(y=metrics_big['loss'],name='loss_big'), row=1, col=1)
fig.add_trace(go.Scatter(y=metrics_big['accuracy'],name='accuracy_big'), row=2, col=1)
fig.add_trace(go.Scatter(y=metrics_big['val_loss'],name='val_loss_big'), row=1, col=1)
fig.add_trace(go.Scatter(y=metrics_big['val_accuracy'],name='val_accuracy_big'), row=2, col=1)

fig.update_layout(width=1000)

In [None]:
## We could try to find "the best models" using checkpoint and earlystopping. Lets find out how many epochs is enough to get the best model out from basic and the big model
## We use vaL_accuracy as monitor and 5 epochs for earlystopping and saving weights to the file

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
# basic model
filepath ='best_basic_model_weights.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_accuracy', save_best_only=True, verbose=1, mode='max')
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose = 1, patience = 5)

In [None]:
history__best_basic = model_basic.fit(X_train, y_train, epochs=30, batch_size=50, validation_split=0.2, callbacks=[checkpoint, es])

In [None]:
#BEST BASIC MODEL - loading weights from the file

best_model_basic = Sequential()
best_model_basic.add(Flatten(input_shape=(28,28)))
best_model_basic.add(Dense(units=128, activation='relu'))
best_model_basic.add(Dropout(0.2))#20% of data for dropout
best_model_basic.add(Dense(units=10, activation='softmax')) #output

best_model_basic.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
best_model_basic.load_weights('best_basic_model_weights.hdf5')

best_model_basic.summary()
best_model_basic.save('my_best_basic_model.h5')

In [None]:
loss_basic, accuracy_basic =  model_basic.evaluate(X_test, y_test)

In [None]:
# smth wrong ?! accuracu 0.0983 ? 

loss_best_model_basic, accuracy_best_model_basic =  best_model_basic.evaluate(X_test, y_test)

In [None]:
#big  model

filepath ='best_big_model_weights.hdf5'

checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose = 1, patience = 5)
history__best_big = model_big.fit(X_train, y_train, epochs=30, batch_size=50, validation_split=0.2, callbacks=[checkpoint, es])

In [None]:
#BEST BIG MODEL - loading weights from the file

best_model_big = Sequential()
best_model_big.add(Flatten(input_shape=(28,28)))
best_model_big.add(Dense(units=256, activation='relu'))
best_model_big.add(Dense(units=512, activation='relu'))
best_model_big.add(Dropout(0.2))#20% of data for dropout on reg.
best_model_big.add(Dense(units=10, activation='softmax')) #output

best_model_big.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
best_model_big.load_weights('best_big_model_weights.hdf5')

best_model_big.save('my_best_big_model.h5')
best_model_big.summary()