# Import Packages

In [1]:
# Bases
import keras as k
import tensorflow as tf
from keras import backend as K

## data
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## building
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, LayerNormalization, GlobalAveragePooling2D

## plotting
import os
import numpy as np
import matplotlib.pyplot as plt

## callbacks
from keras.callbacks import TensorBoard
import time

from keras.applications import ResNet152
from keras import Input, Model

import sklearn
from sklearn.metrics import  classification_report, roc_curve

Using TensorFlow backend.


## Environment Configuration for GPU

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')

memory_limit=1024
if gpus:
  # Create 2 virtual GPUs with 1GB memory each
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit),
         #tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit),
         tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

1 Physical GPU, 2 Logical GPUs


In [3]:
tf.test.is_built_with_cuda()

True

In [4]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU')]

In [5]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13463972535812607633
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 6731419378047132944
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1073741824
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7074386158972131739
physical_device_desc: "device: 0, name: GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 17821101728704306533
physical_device_desc: "device: XLA_GPU device"
]


# Data Preparing

The dataset has the following directory ***structure***:

<pre>
<b>Fruit Images Dataset</b>
|__ <b>train</b>
    |______ <b>Apple Braeburn</b>: [0_100.jpg, ..]
    |______ <b>Apple Crimson Snow</b>: [0_100.jpg, ..] 
    ..
    |______ <b>Apple Watermelon</b>: [0_100.jpg, ..]
|__ <b>test</b>
    |______ <b>Apple Braeburn</b>: [0_100.jpg, ..]
    |______ <b>Apple Crimson Snow</b>: [0_100.jpg, ..] 
    ..
    |______ <b>Apple Watermelon</b>: [0_100.jpg, ..]
</pre>

***How to prepare data***

Format the images into appropriately pre-processed floating point tensors before feeding to the network:

1. Read images from the disk.
2. Decode contents of these images and convert it into proper grid format as per their RGB content.
3. Convert them into floating point tensors.
4. Rescale the tensors from values between 0 and 255 to values between 0 and 1, as neural networks prefer to deal with small input values.

Fortunately, all these tasks can be done with the `ImageDataGenerator` class provided by `tf.keras`, which can:
* Read images and preprocess them into proper tensors. 
* Set up generators that convert these images into batches of tensors — helpful when training the network.

In [6]:
# Generator
train_generator = ImageDataGenerator(rescale=1/255, validation_split=0.2,
                                     horizontal_flip=True,
                                      rotation_range=45)
test_generator = ImageDataGenerator(rescale=1/255)

In [7]:
# Variable for pre-processing and training
batch_size = 128
epochs = 15

IMG_HEIGHT = 100
IMG_WIDTH = 100

num_classes = 131

where_train = '/home/maihai/GitHub/Fruit-Images-Dataset/train'
where_test  = '/home/maihai/GitHub/Fruit-Images-Dataset/test'

After defining the generators, the `flow_from_directory` method will:
* Load images from the disk,
* Applies rescaling,
* Applies resizes images into the required dimensions.

In [8]:
train_data_gen = train_generator.flow_from_directory(directory=where_train, 
                                         target_size=(IMG_HEIGHT, IMG_WIDTH),
                                         class_mode='categorical',
                                         shuffle=False,
                                         batch_size=batch_size,
                                         subset='training')

val_data_gen = train_generator.flow_from_directory(directory=where_train,
                                        target_size=(IMG_HEIGHT, IMG_WIDTH),
                                        class_mode='categorical',
                                        shuffle=False,
                                        batch_size=batch_size,
                                        subset='validation')

Found 54190 images belonging to 131 classes.
Found 13502 images belonging to 131 classes.


In [9]:
test_data_gen = test_generator.flow_from_directory(directory=where_test,
                                        target_size=(IMG_HEIGHT, IMG_WIDTH),
                                        class_mode='categorical',
                                        shuffle=True,
                                        batch_size=batch_size)

Found 22688 images belonging to 131 classes.


# The State-of-the-art: ResNet 152

***ResNet 152***, published by Google in 2015. ResNet 152 as its name includes a very deep networks of 152 layers, addressing the problem of information vanishing by Resudual architecture, which skip connection while computing.

ResNet was the winner of all stars in 2015, then embeded in Keras with pre-trained weighted of ***'imagenet'***.

## The typical transfer learning workflow

***Workflow 1***

In this project, we will use transfer learning technique to apply ResNet 152 architecture on our Fruit dataset.

1. ***Instantiate*** the ResNet then load pre-trained weights.
2. ***Freeze*** all layers by setting ResNet152.trainable = False.
3. ***Create*** our custom layers on top of the Resnet's output.
4. ***Train*** only our custom layers.




***Workflow 2***  

Workflow 1 has a backdraw in my computer: out of memory. Though we marked ResNet's core layers to be skiped while training, but all of them still be loaded to memory, the number is around 60 millions. The number that big is unable for my computer to work with. So, let's try an alternative approach, much cheaper and more lightweight.  
1. ***Instantiate*** the ResNet and load pre=trained weights.
2. ***Run*** our Fruit dataset through it, then receive the output. This is called ***feature extraction***.
3. ***Use*** that output as input for a new, smaller model.  

By this workflow, the need for memory would highly decrease.

***Fine tuning***  

Either your choice is Workflow 1 or 2, afterall, we have a basical transfer-learning models, with most of weights belong to the core ResNet and a minority of our custom layers. Then, we need to adjust those ResNet weights to the Fruit dataset. The steps are follow:

1. ***Unfreeze*** ResNet's core layers by setting ResNet152.trainable = True.
2. ***Train*** the entire model, both top layers and the core.  

### The workflow 1

In [11]:
# 1 Instantiate
resnet = ResNet152(weights='imagenet',
                   input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                   include_top=False) # to adopt our fruit classifier
# 2 Freeze
resnet.trainable = False

# 3 Create new model on top
inputs  = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
x       = resnet(inputs)
x = k.layers.GlobalAveragePooling2D()(x) # Convert features of shape `resnet.output_shape[1:]` to vectors
outputs = k.layers.Dense(num_classes)(x)

resnet_copycat = Model(inputs, outputs)
resnet_copycat.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 100, 100, 3)       0         
_________________________________________________________________
resnet152 (Model)            (None, 4, 4, 2048)        58370944  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 131)               268419    
Total params: 58,639,363
Trainable params: 268,419
Non-trainable params: 58,370,944
_________________________________________________________________


In [12]:
resnet_copycat.compile(optimizer='adam',
                       loss=tf.keras.losses.CategoricalCrossentropy(),
                       metrics=['categorical_accuracy'])

In [12]:
# Model Checkpoint + TensorBoard
checkpoint_path = 'ResNet152'
checkpoint_callback = k.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                         save_weights_only=True,
                                                         monitor='val_acc',
                                                         mode='max',
                                                         save_best_only=True)

NAME = 'Resnet_copycat_flow_1_{}'.format(int(time.time()))
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

In [13]:
%%time
# 4 Training our top layers
history = resnet_copycat.fit(train_data_gen,
                             steps_per_epoch=67692 // batch_size,
                             epochs=epochs,
                             validation_data=val_data_gen,
                             validation_steps=22688 // batch_size,
                             callbacks=[tensorboard, checkpoint_callback])

Epoch 1/15
Epoch 2/15




Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
CPU times: user 1h 39min 31s, sys: 21min 29s, total: 2h 1min
Wall time: 1h 47min 14s


In [None]:
resnet_copycat.save_weights('resnet_copycat_weights', overwrite=False)

In [None]:
# Clear VRAM
K.clear_session()

### Fine-tuning of the entire model

Finally, let's unfreeze the base model and train the entire model end-to-end with a low learning rate.

resnet.trainable = True

resnet_copycat.compile(optimizer='adam',
                       loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                       metrics=['accuracy'])

%%time
#### Please, don't run this block. 
## GPU has not enough VRAM to train it.
## CPU got 38 mins for each epoch.
## This block is so expensive, don't run it.

# Setting up TensorBoard
NAME = 'Handmade_model_resnet_copycat_fineTune{}'.format(int(time.time()))
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

# Training
with tf.device('/cpu:0'):
    history = resnet_copycat.fit(train_data_gen,
                             steps_per_epoch=67692 // batch_size,
                             epochs=epochs,
                             validation_data=val_data_gen,
                             validation_steps=22688 // batch_size,
                             callbacks=[tensorboard]
)

# Classification Report

In [13]:
resnet_copycat.load_weights('resnet_copycat_weights')

In [18]:
%%time
y_pred = resnet_copycat.predict(test_data_gen,
                                    steps=22688/batch_size,
                                    verbose=1)


CPU times: user 1min 43s, sys: 24.2 s, total: 2min 8s
Wall time: 2min


In [19]:
y_pred = np.argmax(y_pred[:22688], axis=-1)

In [20]:
y_true = test_data_gen.classes

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       164
           1       0.00      0.00      0.00       148
           2       0.00      0.00      0.00       160
           3       0.00      0.00      0.00       164
           4       0.00      0.00      0.00       161
           5       0.00      0.00      0.00       164
           6       0.00      0.00      0.00       152
           7       0.00      0.00      0.00       164
           8       0.00      0.00      0.00       164
           9       0.00      0.00      0.00       144
          10       0.00      0.00      0.00       166
          11       0.00      0.00      0.00       164
          12       0.00      0.00      0.00       219
          13       0.00      0.00      0.00       164
          14       0.00      0.00      0.00       143
          15       0.00      0.00      0.00       166
          16       0.00      0.00      0.00       166
          17       0.00    