# Import Packages

In [1]:
# Bases
import keras as k
import tensorflow as tf

## data
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## building
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, LayerNormalization, GlobalAveragePooling2D

## plotting
import os
import numpy as np
import matplotlib.pyplot as plt

## callbacks
from keras.callbacks import TensorBoard
import time

from keras.applications import ResNet152
from keras import Input, Model

Using TensorFlow backend.


## Environment Configuration for GPU

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')

memory_limit=1024
if gpus:
  # Create 2 virtual GPUs with 1GB memory each
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit),
         #tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit),
         tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memory_limit)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

1 Physical GPU, 2 Logical GPUs


In [3]:
tf.test.is_built_with_cuda()

True

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU')]

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4093476482208990596
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 4050526615020432687
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3034185728
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6871143520370017842
physical_device_desc: "device: 0, name: GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 4045811008313762940
physical_device_desc: "device: XLA_GPU device"
]


# Data Preparing

The dataset has the following directory ***structure***:

<pre>
<b>Fruit Images Dataset</b>
|__ <b>train</b>
    |______ <b>Apple Braeburn</b>: [0_100.jpg, ..]
    |______ <b>Apple Crimson Snow</b>: [0_100.jpg, ..] 
    ..
    |______ <b>Apple Watermelon</b>: [0_100.jpg, ..]
|__ <b>test</b>
    |______ <b>Apple Braeburn</b>: [0_100.jpg, ..]
    |______ <b>Apple Crimson Snow</b>: [0_100.jpg, ..] 
    ..
    |______ <b>Apple Watermelon</b>: [0_100.jpg, ..]
</pre>

***How to prepare data***

Format the images into appropriately pre-processed floating point tensors before feeding to the network:

1. Read images from the disk.
2. Decode contents of these images and convert it into proper grid format as per their RGB content.
3. Convert them into floating point tensors.
4. Rescale the tensors from values between 0 and 255 to values between 0 and 1, as neural networks prefer to deal with small input values.

Fortunately, all these tasks can be done with the `ImageDataGenerator` class provided by `tf.keras`, which can:
* Read images and preprocess them into proper tensors. 
* Set up generators that convert these images into batches of tensors — helpful when training the network.

In [2]:
# Generator
train_generator = ImageDataGenerator(rescale=1/255)
val_generator = ImageDataGenerator(rescale=1/255)

In [3]:
# Variable for pre-processing and training
batch_size = 128
epochs = 15

IMG_HEIGHT = 100
IMG_WIDTH = 100

num_classes = 131

where_train = '/home/maihai/GitHub/Fruit-Images-Dataset/train'
where_test  = '/home/maihai/GitHub/Fruit-Images-Dataset/test'

After defining the generators, the `flow_from_directory` method will:
* Load images from the disk,
* Applies rescaling,
* Applies resizes images into the required dimensions.

In [21]:
train_data_gen = image_gen.flow_from_directory(batch_size=batch_size,
                                               directory=where_train,
                                               shuffle=True,
                                               target_size=(IMG_HEIGHT, IMG_WIDTH))

Found 67692 images belonging to 131 classes.


In [4]:
val_data_gen = val_generator.flow_from_directory(directory=where_test,
                                        target_size=(IMG_HEIGHT, IMG_WIDTH),
                                        class_mode='categorical',
                                        shuffle=True,
                                        batch_size=batch_size)

Found 22688 images belonging to 131 classes.


In [20]:
image_gen = ImageDataGenerator(rescale=1./255,
                               horizontal_flip=True,
                               rotation_range=45)

# The State-of-the-OLD-art: AlexNet - or CaffeNet training on 1 GPU

AlexNet is a famous CNN architecture created in 2012 by an Ukrainian, honored by gaining the first places in contests with ImageNet database. The author trained AlexNet by 2 GPU in weeks, then develope an one-GPU version named CaffeNet. We will replicate his work in CaffeNet.

![](caffenet.png)

## Data preprocess

In [None]:
batch_size = 64
epochs = 15

IMG_HEIGHT = 224
IMG_WIDTH = 224

where_train = '/home/maihai/GitHub/Fruit-Images-Dataset/train'
where_test  = '/home/maihai/GitHub/Fruit-Images-Dataset/test'

In [None]:
# Generator

## Augmentation for training set only
train_generator = ImageDataGenerator(rescale=1./255,
                               horizontal_flip=True,
                               rotation_range=45)

val_generator = ImageDataGenerator(rescale=1/255)

In [None]:
train_data_gen = train_generator.flow_from_directory(directory=where_train, 
                                         target_size=(IMG_HEIGHT, IMG_WIDTH),
                                         class_mode='categorical',
                                         shuffle=True,
                                         batch_size=batch_size)

In [None]:
val_data_gen = val_generator.flow_from_directory(directory=where_test,
                                        target_size=(IMG_HEIGHT, IMG_WIDTH),
                                        class_mode='categorical',
                                        shuffle=True,
                                        batch_size=batch_size)

In [None]:
sample_training_images, _ = next(train_data_gen)
plotImages(sample_training_images[:5])

## Create the model

![](caffenet.png)

In [None]:
caffe = Sequential([
    # Layer 1
    Conv2D(96, kernel_size=(11, 11), strides=4, activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),

    # Layer 2
    Conv2D(256, kernel_size=(5, 5), activation='relu'),
    MaxPooling2D(),
    LayerNormalization(),
    
    # Layer 3
    Conv2D(384, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(),
    LayerNormalization(),
    
    # Layer 4
    Conv2D(384, kernel_size=(3, 3), activation='relu'),
    
    # Layer 5
    Conv2D(256, kernel_size=(3, 3), activation='relu'), 
    
    # Layer 6
    Dense(4096),
    MaxPooling2D(),
    
    # Layer 7
    Dense(4096),
    
    # Output layeres
    Dense(1000, activation='softmax') # We have 131 classess of fruits
])

In [None]:
caffe.summary()

# The State-of-the-NEW-art: ResNet 152

***ResNet 152***, published by Google in 2015. ResNet 152 as its name includes a very deep networks of 152 layers, addressing the problem of information vanishing by Resudual architecture, which skip connection while computing.

ResNet was the winner of all stars in 2015, then embeded in Keras with pre-trained weighted of ***'imagenet'***.

## The typical transfer learning workflow

***Workflow 1***

In this project, we will use transfer learning technique to apply ResNet 152 architecture on our Fruit dataset.

1. ***Instantiate*** the ResNet then load pre-trained weights.
2. ***Freeze*** all layers by setting ResNet152.trainable = False.
3. ***Create*** our custom layers on top of the Resnet's output.
4. ***Train*** only our custom layers.




***Workflow 2***  

Workflow 1 has a backdraw in my computer: out of memory. Though we marked ResNet's core layers to be skiped while training, but all of them still be loaded to memory, the number is around 60 millions. The number that big is unable for my computer to work with. So, let's try an alternative approach, much cheaper and more lightweight.  
1. ***Instantiate*** the ResNet and load pre=trained weights.
2. ***Run*** our Fruit dataset through it, then receive the output. This is called ***feature extraction***.
3. ***Use*** that output as input for a new, smaller model.  

By this workflow, the need for memory would highly decrease.

***Fine tuning***  

Either your choice is Workflow 1 or 2, afterall, we have a basical transfer-learning models, with most of weights belong to the core ResNet and a minority of our custom layers. Then, we need to adjust those ResNet weights to the Fruit dataset. The steps are follow:

1. ***Unfreeze*** ResNet's core layers by setting ResNet152.trainable = True.
2. ***Train*** the entire model, both top layers and the core.  

### The workflow 1

In [17]:
# 1 Instantiate
resnet = ResNet152(weights='imagenet',
                   input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                   include_top=False) # to adopt our fruit classifier
# 2 Freeze
resnet.trainable = False

# 3 Create new model on top
inputs  = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
x       = resnet(inputs)
x = k.layers.GlobalAveragePooling2D()(x) # Convert features of shape `resnet.output_shape[1:]` to vectors
outputs = k.layers.Dense(num_classes)(x)

resnet_copycat = Model(inputs, outputs)
resnet_copycat.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 100, 100, 3)       0         
_________________________________________________________________
resnet152 (Model)            (None, 4, 4, 2048)        58370944  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 131)               268419    
Total params: 58,639,363
Trainable params: 268,419
Non-trainable params: 58,370,944
_________________________________________________________________


In [18]:
resnet_copycat.compile(optimizer='adam',
                       loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                       metrics=['accuracy'])

In [19]:
NAME = 'Resnet_copycat_flow_1_{}'.format(int(time.time()))

tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

In [None]:
%%time
# 4 Training our top layers
history = resnet_copycat.fit(train_data_gen,
                             steps_per_epoch=67692 // batch_size,
                             epochs=epochs,
                             validation_data=val_data_gen,
                             validation_steps=22688 // batch_size,
                             callbacks=[tensorboard])

In [None]:
where_to_save = '/home/maihai/GitHub/Portfolio/4_Fruit_classification/resnet_copycat_flow_2'

model_baseline.save(filepath=where_to_save, overwrite=False, include_optimizer=True)

### Fine-tuning of the entire model

Finally, let's unfreeze the base model and train the entire model end-to-end with a low learning rate.

In [None]:
resnet.trainable = True

resnet_copycat.compile(optimizer='adam',
                       loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                       metrics=['accuracy'])

In [None]:
%%time
#### Please, don't run this block. 
## GPU has not enough VRAM to train it.
## CPU got 38 mins for each epoch.
## This block is so expensive, don't run it.

# Setting up TensorBoard
NAME = 'Handmade_model_resnet_copycat_fineTune{}'.format(int(time.time()))
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))

# Training
with tf.device('/cpu:0'):
    history = resnet_copycat.fit(train_data_gen,
                             steps_per_epoch=67692 // batch_size,
                             epochs=epochs,
                             validation_data=val_data_gen,
                             validation_steps=22688 // batch_size,
                             callbacks=[tensorboard]
)