# Lab 01 - Train Your First CNN

In [None]:
#Release: 1.1907.1601

### Import Module

In [None]:
import tensorflow as tf
import keras
import numpy as np

print("TensorFlow version: " + tf.__version__)

In [None]:
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from PIL import Image
from matplotlib import pyplot
from scipy.misc import toimage

%matplotlib inline

###  - Function to Show Image -

In [None]:
#helper function
def show_image(img, label="", zoom=False):
    #labelname[y_train[0,0].astype(int)]
    if not zoom:
        #pyplot.figure(figsize=(4,3))
        pyplot.subplot(330 + 1 + i)
    else:
        pyplot.figure(figsize=(6,3))
        pyplot.subplot(1,2,1)
        pyplot.grid(False)
    pyplot.xticks([])
    pyplot.yticks([])
    #pyplot.imshow(img, cmap=pyplot.cm.binary)
    pyplot.xlabel('{code} - {label}'.format(code=label, label=labelname[label]))
    pyplot.imshow(img, cmap=pyplot.cm.binary)

### - Function to plot model accuracy and loss -

In [None]:
def plot_model_history(model_history):
    # summarize history for accuracy
    pyplot.plot(history.history['acc'])
    pyplot.plot(history.history['val_acc'])
    pyplot.title('model accuracy')
    pyplot.ylabel('accuracy')
    pyplot.xlabel('epoch')
    pyplot.legend(['train', 'test'], loc='upper left')
    pyplot.show()
    
    # summarize history for loss
    pyplot.plot(history.history['loss'])
    pyplot.plot(history.history['val_loss'])
    pyplot.title('model loss')
    pyplot.ylabel('loss')
    pyplot.xlabel('epoch')
    pyplot.legend(['train', 'test'], loc='upper left')
    pyplot.show()
    

### - Function Show Image Prediction -

In [None]:
#helper function
def show_image_prediction(i, predictions_array, true_label, img):
  predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
  pyplot.grid(False)
  pyplot.xticks([])
  pyplot.yticks([])
  
  pyplot.imshow(img, cmap=pyplot.cm.binary)
  
  predicted_label = np.argmax(predictions_array)
    
  if predicted_label == true_label:
    color = 'blue'
   
  else:
    color = 'red'
  
  pyplot.xlabel("{} {:2.0f}% ({})".format(labelname[predicted_label],
                                100*np.max(predictions_array),
                                labelname[true_label[0]]),
                                color=color)

def show_value_prediction(i, predictions_array, true_label):
  predictions_array, true_label = predictions_array[i], true_label[i]
  pyplot.grid(False)
  pyplot.xticks([])
  pyplot.yticks([])
  thisplot = pyplot.bar(range(10), predictions_array, color="#777777")
  pyplot.ylim([0, 1]) 
  predicted_label = np.argmax(predictions_array)
  #true_label = np.argmax(true_label)
 
  thisplot[predicted_label].set_color('red')
  thisplot[true_label[0]].set_color('blue')

#### Step 01 - Load Cifar10 Data

In [None]:
num_classes = 10
labelname = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]

# load data
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

#### Step 02 - Data Exploration

In [None]:
# view data shape
print('X_train shape: ', X_train.shape)
print('Number of image: ', X_train.shape[0])

print('Y_train shape:', Y_train.shape)

In [None]:
# view sample data
for i in range(0, 9):
    show_image(Image.fromarray(X_train[i]), Y_train[i,0])

In [None]:
X_train[0]

In [None]:
X_train[0][0]

In [None]:
Y_train[0]

In [None]:
# data distribution
u,indices = np.unique(Y_train,return_counts = True) 
print(u) 
print(indices)
#u

# Make a histogram with 62 bins of the `labels` data
pyplot.hist(Y_train, 10)

# Show the plot
pyplot.show()

#### Step 03 - Training Data Preparation

In [None]:
# The data, split between train and test sets:
(img_train, label_train), (img_test, label_test) = cifar10.load_data()
print('img_train shape:', img_train.shape)
print('label_train shape:', label_train.shape)
print('Training data: {num} samples'.format(num = img_train.shape[0]))
print('Test data: {num} samples'.format(num = img_test.shape[0]))

In [None]:
#integer to float
img_train_f = img_train.astype('float32')
img_test_f = img_test.astype('float32')
img_train_f /= 255
img_test_f /= 255
print('x_train shape:', img_train_f.shape)
print(img_train_f.shape[0], 'train samples')
print(img_test_f.shape[0], 'test samples')

In [None]:
# Convert label vectors to one hot encoding.
label_train_one_hot = keras.utils.to_categorical(label_train, num_classes)
label_test_one_hot = keras.utils.to_categorical(label_test, num_classes)
print('label_train shape:', label_train_one_hot.shape)
print('Original label:', label_train[0])
print('After conversion to one-hot:', label_train_one_hot[0])

#### Step 04 - Create Model

In [None]:
#Define the model
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=img_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.summary()

#### Step 05 - Train Model

In [None]:
#Training Hyperparatemer
learning_rate = 0.0001
decay = 1e-6
batch_size = 32
epochs = 30

In [None]:
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=learning_rate, decay=decay)

In [None]:
# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
import os
import datetime
import time

start = time.time()
history = model.fit(img_train_f, label_train_one_hot,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(img_test_f, label_test_one_hot),
              shuffle=True)

# Save model and weights
now = datetime.datetime.now()
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'labs01_model_{batch}.h5'.format(batch=now.strftime("%Y%m%d_%H%M"))

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

end = time.time()
print("Model took {:2.0f} seconds to train".format((end - start)))

In [None]:
#show graph
plot_model_history(history)

#### Step 06 - Evaluate Model

In [None]:
test_loss, test_acc = model.evaluate(img_test_f, label_test_one_hot)
print('Test accuracy:', test_acc)

In [None]:
predictions = model.predict(img_test_f)

In [None]:
#labelname = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]
i = 0
pyplot.figure(figsize=(6,3))
pyplot.subplot(1,2,1)
show_image_prediction(i, predictions, label_test, img_test)
pyplot.subplot(1,2,2)
show_value_prediction(i, predictions,  label_test)
predictions[i]

In [None]:
# Plot the first X test images, their predicted label, and the true label
# Color correct predictions in blue, incorrect predictions in red
num_rows = 15
num_cols = 3
num_images = num_rows*num_cols
pyplot.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  pyplot.subplot(num_rows, 2*num_cols, 2*i+1)
  show_image_prediction(i, predictions, label_test, img_test)
  pyplot.subplot(num_rows, 2*num_cols, 2*i+2)
  show_value_prediction(i, predictions, label_test)

# What Next?

#### 1. Retraining with the same data
#### 2. Retraining by using augmented data
#### 3. Experiment with more complex model 