# Deep Learning: Ex.5 - CIFAR-10 take 2

Submitted by: Noam Bassat 308465434


In [None]:
# TensorFlow and tf.keras
from tensorflow import keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten,Conv2D,MaxPooling2D,Dropout,BatchNormalization
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from seaborn import heatmap 


### Load the CIFAR-10 Dataset

When running this command for the first time, it will download dataset from a remote server, which might take some time.. (in case of server error - just try again a bit later)

In [None]:
# 1. load/download the data
(train_images, train_labels), (test_images, test_labels) = keras.datasets.cifar10.load_data()

# 2. flatten the labels (easier to deal with)
train_labels = train_labels.flatten()  # (50000, 1) -> (50000,)
test_labels = test_labels.flatten()    # (10000, 1) -> (10000,)

# 3. convert uint8->float32 and normalize range to 0.0-1.0 
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# 4. define the 10 classes names
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer','dog', 'frog', 'horse', 'ship', 'truck']

# 5. print the shapes
print('train_images.shape =',train_images.shape)
print('train_labels.shape =',train_labels.shape)
print('test_images.shape =',test_images.shape)
print('test_labels.shape =',test_labels.shape)


***

- We will try different models (build, fit on training data, and evaluate on test data).

- The input layer is the images (32x32x3), and the output layer is a `softmax` 10 units (one unit for each class).

- For each model, plot the train/test loss & accuracy plots (as shown in class).

- Summarize the results of all models in the table below:


<table>
  <tr>
    <th>Model</th>
    <th>#parameters</th>
    <th>epochs</th>
    <th>train accuracy</th>
    <th>test accuracy</th>
  </tr>
    
  <!-- copy this block once for every model you tested -->  
  <tr> 
    <td>1. VGG-like model</td>   <!-- Model -->
    <td>1,341,226 </td> <!-- #parameters -->
    <td>60</td> <!-- epochs -->
    <td> 0.9756</td> <!-- train accuracy -->
    <td> 0.7789</td> <!-- test accuracy -->
  </tr>
    
   <tr> 
    <td>2. Batch Normalization</td>   <!-- Model -->
    <td>1,345,066 </td> <!-- #parameters -->
    <td>60</td> <!-- epochs -->
    <td>0.9945</td> <!-- train accuracy -->
    <td>0.7868</td> <!-- test accuracy -->
  </tr>

  <tr> 
    <td>3. Data Augmentation</td>   <!-- Model -->
    <td>1,345,066</td> <!-- #parameters -->
    <td>60</td> <!-- epochs -->
    <td>0.6704 </td> <!-- train accuracy -->
    <td>0.7404</td> <!-- test accuracy -->
  </tr>
</table>




In [None]:
 
def compile_and_train_the_model(model, epochs_num):
    # compile the model:
    opt = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # train the model - with validation
    history = model.fit(train_images, train_labels, epochs=epochs_num, batch_size=128, 
                        validation_data=(test_images,test_labels))
    
    return history

***
### 1. VGG-like model

Apply the following layers (between the input and output layers):
- **Block1:** 32-`Conv2D` + 32-`Conv2D` +  `MaxPooling` 
- **Block2:** 64-`Conv2D` + 64-`Conv2D` +  `MaxPooling` 
- **Block3:** 128-`Conv2D` + 128-`Conv2D` +  `MaxPooling` 
- **Block4:** 512-`Dense` + `Dropout(0.5)` 

Use 3x3 kernerls and `padding='same'` in all Conv2D layers.

Use SGD+Momentum or Adam optimizers.

In [None]:
model = Sequential()
model.add(Conv2D(32,kernel_size = (3,3), activation='relu',padding='same', input_shape=(32, 32, 3)))
model.add(Conv2D(32,kernel_size = (3,3), activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(64,kernel_size = (3,3), activation='relu',padding='same'))
model.add(Conv2D(64,kernel_size = (3,3), activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(128,kernel_size = (3,3), activation='relu',padding='same'))
model.add(Conv2D(128,kernel_size = (3,3), activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))
model.summary()

In [None]:
history = compile_and_train_the_model(model,60)

***
### 2. Batch Normalization

Use the previous model (VGG-like), and add a `BatchNormalization` layer after each of the `Conv2D` or `Dense` layers (except the output layer of course).

Use the same running options as before (batch size, epochs, optimizer), but **use a different variable** to record the `history` of the training results (don't run over the previous one..).

You should expect the same level of accuracy, but at a shorter convergence time (=less epochs).

In [None]:
model = Sequential()
model.add(Conv2D(32,kernel_size = (3,3), activation='relu',padding='same', input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(64,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(128,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))
model.summary()

In [None]:
history2 = compile_and_train_the_model(model,60)

***
### 3. Data Augmentation

In order to acheive better results (higher validation accuracy), we will try to use data augmentation.

Use Keras's `ImageDataGenerator` as described in class to re-train your last model (VGG-like + BN). 

https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator

Note that each batch is taking more time to train, as the we need to re-generate each batch of sample.

Again, **use a different variable** to record the history of the training results.

Note, when using data augmentation inside `model.fit()`, you must specify `steps_per_epoch` (as the batch is generated outside of this method, and its size is therefore unknown). This number should be set as the (integer) number of total training samples (50,000) divided by the batch size (defined inside the generator `.flow` method).


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.1,)

data_iter = datagen.flow(train_images, train_labels, batch_size=5)

In [None]:
model = Sequential()
model.add(Conv2D(32,kernel_size = (3,3), activation='relu',padding='same', input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(64,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(128,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128,kernel_size = (3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(10, activation='softmax'))
model.summary()

In [None]:
opt = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])


In [None]:
history3 = model.fit(data_iter, steps_per_epoch=train_images.shape[0]//10, epochs=60, 
                    validation_data=(test_images,test_labels))

---
### Graphical comparison 

For each of the benchmark graphs (train-loss, val-loss, train-acc, val-loss) plot a single graph with all 3 runs (use different color for each of the runs).

Add graph labels and legends.

In [None]:
plt.figure(figsize=(14,4))
plt.subplot(1,4,1)
plt.plot(history.history['loss'],'r',label = "1")
plt.plot(history2.history['loss'],'b',label = "2")
plt.plot(history3.history['loss'],'g',label = "3")
plt.title('Loss',fontsize=14)
plt.xlabel('Epochs',fontsize=14)
plt.legend(('1','2','3'))
plt.grid()

plt.subplot(1,4,2)
plt.plot(history.history['accuracy'],'r',label = "1")
plt.plot(history2.history['accuracy'],'b',label = "2")
plt.plot(history3.history['accuracy'],'g',label = "3")
plt.ylim([0, 1])
plt.title('Accuracy',fontsize=12)
plt.xlabel('Epochs',fontsize=12)
plt.legend(('1','2','3'))
plt.grid()

plt.subplot(1,4,3)
plt.plot(history.history['val_loss'],'r',label = "1")
plt.plot(history2.history['val_loss'],'b',label = "2")
plt.plot(history3.history['val_loss'],'g',label = "3")
plt.ylim([0, 1])
plt.title('val_loss',fontsize=12)
plt.xlabel('Epochs',fontsize=12)
plt.legend(('1','2','3'))
plt.grid()

plt.subplot(1,4,4)
plt.plot(history.history['val_accuracy'],'r',label = "1")
plt.plot(history2.history['val_accuracy'],'b',label = "2")
plt.plot(history3.history['val_accuracy'],'g',label = "3")
plt.ylim([0, 1])
plt.title('val_accuracy',fontsize=12)
plt.legend(('1','2','3'))
plt.xlabel('Epochs',fontsize=12)
plt.grid()

***
## Good Luck!