# Part 1: Underfitting and Overfitting in a classification task 

## Setup

In [2]:
import tensorflow as tf
import numpy as np 
from tensorflow import keras
from tensorflow.keras import layers

## Prepare the data 


In [22]:
#  MNIST 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() 

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255 
x_test = x_test.astype("float32") / 255 

print('Input data (images) shape as imported from the MNIST dataset ', x_train[0].shape)

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print("Input data (images) after adding extra dimension to indicate the number of channels:", x_train[0].shape)

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

#ADDED to check the format of labels (integers from 0 to 9) before preprocessing (one hot encoding)

print('Randomly picked label (from the training set) before one hot encoding: ', y_train[0])

# convert class vectors to binary class matrices             
y_train = keras.utils.to_categorical(y_train, num_classes)  
y_test = keras.utils.to_categorical(y_test, num_classes) # each numerical/categorical value is represented as a binary vector

#ADDED to check the format of labels (integers from 0 to 9) after preprocessing (one hot encoding)

print('Same randomly picked label (from the training set) after one hot encoding: ',y_train[0])

Input data (images) shape as imported from the MNIST dataset  (28, 28)
Input data (images) after adding extra dimension to indicate the number of channels: (28, 28, 1)
60000 train samples
10000 test samples
Randomly picked label (from the training set) before one hot encoding:  5
Same randomly picked label (from the training set) after one hot encoding:  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


![](imgs/one_hot.png)

# Added for image data visualization

In [31]:
import matplotlib.pyplot as plt #
%matplotlib notebook

n=-1 #visualize random image in the training set. So 'n' goes from 0 to 59999
plt.imshow(x_train[n,:,:,0]) #* default colormap 'viridis'. Check cmap="" options. grayscale= gray

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x18e02375c48>

# Visualization of the architecure

![](imgs/cnn.png)


## Valid padding:
![SegmentLocal](imgs/valid.gif "segment")

## Same padding:
![SegmentLocal](imgs/Same_padding.gif "segment")


# Build the model

In [36]:
model = keras.Sequential( 
    [
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(32, kernel_size=(5, 5), activation="relu"),  
        layers.MaxPooling2D(pool_size=(2, 2)), #Downsamples the input along its spatial dimensions. Check arguments
        layers.Conv2D(64, kernel_size=(5, 5), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(10, activation="softmax"),# * recall softmax. *Show images
    ]
)

model.summary()

#Parameters
# 832= 5x5x32+32
# 51264= 5x5x32x64+64
# 10250= 1024x10+10

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 24, 24, 32)        832       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 12, 12, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 8, 8, 64)          51264     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 4, 4, 64)         0         
 2D)                                                             
                                                                 
 flatten_3 (Flatten)         (None, 1024)              0         
                                                                 
 dense_3 (Dense)             (None, 10)               

## Softmax activation 
<img src="imgs/Softmax.png">


<img src="imgs/cce_loss.png">


##  Categorical cross entropy 
<img src="imgs/CategoricalCrossentropy.png">



# Compile the model

In [None]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) 


# Training the model

In [37]:

history= model.fit(x_train, y_train, batch_size=128, epochs=15, validation_split=0.2) 

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Evaluate the model

In [9]:

score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.019479156361361674
Test accuracy: 0.9931


# Visualization of the loss evolution during training

In [40]:
# 
plt.plot(history.history['loss']) #
plt.plot(history.history['val_loss'])
plt.title('Model loss evolution')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training set loss', 'Validation set loss'], loc='upper left')
plt.show()

plt.savefig('Loss vs Epochs Plot.png')

<IPython.core.display.Javascript object>

#  Visualization of the classification accuracy during training

In [41]:
# list the keys of the dictionary stored in history
print(history.history.keys())

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training accuracy', 'Validation accuracy'], loc='upper left')
plt.show()

plt.savefig('Accuracy vs Epochs Plot.png')

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


<IPython.core.display.Javascript object>

# Save and load weights 

In [42]:
#Save the weights of the just trained model in the default directory
model.save_weights('Example2.h5')
model.load_weights('Example2.h5')

# Visualize individual predictions

In [57]:
index=10 #index of a randomly picked sample to make a prediction inside the test set (any number from 0 to 9999)
plt.imshow(x_test[index,:,:,0])
y_test_i=y_test[index]
print("\nGround-truth: ",y_test_i)

y_pred=model.predict(x_test, verbose=0)
y_pred_i=y_pred[index]
print("\n   Predicted: ",np.round(y_pred_i, decimals=3)) 


<IPython.core.display.Javascript object>


Ground-truth:  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

   Predicted:  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


# Induce underfitting 


In [52]:
model2 = keras.Sequential( 
    [
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(1, kernel_size=(5, 5), activation="relu"), 
        layers.MaxPooling2D(pool_size=(2, 2)), 
        layers.Conv2D(1, kernel_size=(5, 5), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model2.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 24, 24, 1)         26        
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 12, 12, 1)        0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 8, 8, 1)           26        
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 4, 4, 1)          0         
 2D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 16)                0         
                                                                 
 dense_4 (Dense)             (None, 10)               

In [53]:

# Compile model
model2.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) 
# Fit the model
history2 = model2.fit(x_train, y_train, batch_size=128, epochs=15, validation_split=0.2) 


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [54]:

# summarize history for loss in training and validation sets to assess Overfitting-Underfitting
plt.plot(history.history['loss']) #
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')

plt.plot(history2.history['loss']) #
plt.plot(history2.history['val_loss'])
plt.legend(['Training_original', 'Val_original','Training_new', 'Val_new'], loc='upper left')
plt.ylim(0,1)
plt.show()
#

<IPython.core.display.Javascript object>

In [58]:
index=10 #index of a randomly picked sample to make a prediction inside the test set (any number from 0 to 9999)
plt.imshow(x_test[index,:,:,0])
y_test_i=y_test[index]
print("\nGround-truth: ",y_test_i)

y_pred=model2.predict(x_test, verbose=0)
y_pred_i=y_pred[index]
print("\n   Predicted: ",np.round(y_pred_i, decimals=3)) 



<IPython.core.display.Javascript object>


Ground-truth:  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

   Predicted:  [0.98  0.    0.006 0.    0.    0.014 0.    0.    0.    0.   ]


# Induce overfitting


In [64]:
model3 = keras.Sequential( 
    [
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(32, kernel_size=(5, 5), activation="relu"),  
        layers.MaxPooling2D(pool_size=(2, 2)), 
        layers.Conv2D(64, kernel_size=(5, 5), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model3.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 24, 24, 32)        832       
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 12, 12, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_13 (Conv2D)          (None, 8, 8, 64)          51264     
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 4, 4, 64)         0         
 g2D)                                                            
                                                                 
 flatten_6 (Flatten)         (None, 1024)              0         
                                                                 
 dense_6 (Dense)             (None, 10)               

In [65]:

# Compile model
model3.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) # a function used to judge the performance of model
# Fit the model
history3 = model3.fit(x_train, y_train, batch_size=128, epochs=30, validation_split=0.999) # Calculates how often predictions equal labels


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [66]:
import matplotlib.pyplot as plt 
%matplotlib notebook 

# summarize history for loss in training and validation sets to assess Overfitting-Underfitting
plt.plot(history.history['loss']) #
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')

plt.plot(history3.history['loss']) #
plt.plot(history3.history['val_loss'])
plt.legend(['Training_original', 'Val_original','Training_new', 'Val_new'], loc='upper left')
plt.show()



<IPython.core.display.Javascript object>

In [67]:
index=10 #index of a randomly picked sample to make a prediction inside the test set
plt.imshow(x_test[index,:,:,0])
y_test_i=y_test[index]
print("\nGround-truth: ",y_test_i)

y_pred=model3.predict(x_test, verbose=0)
y_pred_i=y_pred[index]
print("\n   Predicted: ",np.round(y_pred_i, decimals=3))

<IPython.core.display.Javascript object>


Ground-truth:  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

   Predicted:  [0.99  0.    0.009 0.    0.    0.    0.    0.    0.    0.   ]


# Part 2: Run previous example as a Regression Task
### 1. Do not one-hot encode the labels, instead use the original numeric integer labels
### 2. Substitute output layer of neurons(num_classes) to one single output neuron and change the activation from softmax to linear
### 3. Use a different Loss: Mean Squared Error (Regression)
<img src="files/imgs/MSE.png">

In [69]:



#  MNIST 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() 

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255 
x_test = x_test.astype("float32") / 255 

print('Input data (images) shape as imported from the MNIST dataset ', x_train[0].shape)

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print("Input data (images) after adding extra dimension to indicate the number of channels:", x_train[0].shape)

print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

#ADDED to check the format of labels (integers from 0 to 9) 

print('Randomly picked label (from the training set) : ',y_train[0])

Input data (images) shape as imported from the MNIST dataset  (28, 28)
Input data (images) after adding extra dimension to indicate the number of channels: (28, 28, 1)
60000 train samples
10000 test samples
Randomly picked label (from the training set) :  5


# Build a model with single output neuron and linear activation

In [71]:
model4 = keras.Sequential( #each layer has exactly one input tensor and one output tensor * show in Keras vs. Functional
    [
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(32, kernel_size=(5, 5), activation="relu"), 
        layers.MaxPooling2D(pool_size=(2, 2)), 
        layers.Conv2D(64, kernel_size=(5, 5), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(1, activation="linear"),
    ]
)

model4.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_14 (Conv2D)          (None, 24, 24, 32)        832       
                                                                 
 max_pooling2d_14 (MaxPoolin  (None, 12, 12, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_15 (Conv2D)          (None, 8, 8, 64)          51264     
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 4, 4, 64)         0         
 g2D)                                                            
                                                                 
 flatten_7 (Flatten)         (None, 1024)              0         
                                                                 
 dense_7 (Dense)             (None, 1)                

# Arguments associated to the selected Optimizer

In [73]:
opt=keras.optimizers.Adam(
    learning_rate=0.005,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07,
    amsgrad=False,
    name="Adam"
)

model4.compile(loss="mean_squared_error", optimizer=opt) # a function used to judge the performance of model


In [74]:

history4= model4.fit(x_train, y_train, batch_size=128, epochs=15, validation_split=0.2) 

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [75]:
#Evaluate the model in the Test set made of 10000 images and compute average loss
score = model4.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score)

Test loss: 0.530856192111969


In [79]:
index=0 #index of a randomly picked sample to make a predcition inside the test set
plt.imshow(x_test[index,:,:,0])
y_test_i=y_test[index]
print("\nGround-truth: ",y_test_i)

y_pred=model4.predict(x_test, verbose=0)
y_pred_i=y_pred[index]
print("\n   Predicted: ",np.round(y_pred_i, decimals=3))


<IPython.core.display.Javascript object>


Ground-truth:  7

   Predicted:  [6.931]


In [85]:
plt.scatter(y_test,y_pred,label='CNN-based Model')
plt.scatter(y_test,y_test,label='Ground-truth')
plt.xlabel('Ground-truth data')
plt.ylabel('Predicted data')
plt.title('Parity Plot')
plt.legend()
plt.show()


<IPython.core.display.Javascript object>