In [1]:
import time
import math
import random
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
%matplotlib inline

In [2]:
tf.test.is_gpu_available()

True

# Configs & Hyperparameters

In [3]:
# batch size
batch_size = 32
# validation split
valid_size = .1
# test split
test_size = 0.2

img_dim = (224, 224)

# paths
cache_path = 'cache'
data_path = '/home/tjy/data/china-birds-images'

# Data Loading

In [4]:
import os

n_birds = 10 # use ten kinds of birds first
bird_file_map = {}

# return array of bird names
birdList = sorted(os.listdir(data_path))
loadedImages = []

n_images = 0
n_birds_loaded = 0
for b in birdList:
    if n_birds_loaded >= n_birds:
        break
    print("Loading images for '" + b + "'")
    curdir = os.path.join(data_path, b)
    if not os.path.isdir(curdir):
        continue
    img_files = os.listdir(curdir)
    
    filenames = [os.path.join(curdir, f) for f in img_files]
    n_f = len(filenames)
    if n_f >= 400: # use data only if more than 400 images are found
        bird_file_map[b] = filenames
        print(n_f, "images loaded for '" + b + "'")
        n_birds_loaded += 1
        n_images += n_f
    else:
        print("Not enought data for '" + b + "'")

Loading images for 'Aberrant Bush-Warbler'
Not enought data for 'Aberrant Bush-Warbler'
Loading images for 'Ala Shan Redstart'
470 images loaded for 'Ala Shan Redstart'
Loading images for 'Aleutian Tern'
Not enought data for 'Aleutian Tern'
Loading images for 'Altai Snowcock'
458 images loaded for 'Altai Snowcock'
Loading images for 'American Wigeon'
500 images loaded for 'American Wigeon'
Loading images for 'Amur Falcon'
Not enought data for 'Amur Falcon'
Loading images for 'Arctic Warbler'
Not enought data for 'Arctic Warbler'
Loading images for 'Ashy Bulbul'
Not enought data for 'Ashy Bulbul'
Loading images for 'Ashy Drongo'
455 images loaded for 'Ashy Drongo'
Loading images for 'Ashy Minivet'
499 images loaded for 'Ashy Minivet'
Loading images for 'Ashy Wood Pigeon'
Not enought data for 'Ashy Wood Pigeon'
Loading images for 'Ashy Woodswallow'
Not enought data for 'Ashy Woodswallow'
Loading images for 'Ashy-throated Parrotbill'
Not enought data for 'Ashy-throated Parrotbill'
Loading

In [5]:
import cv2
n_channels = 3
X = np.zeros((n_images, img_dim[0], img_dim[1], n_channels))
labels = []

i = 0
for k, v in bird_file_map.items():
    for file in v:
        try:
            im = cv2.imread(file)
            if im is None or im.shape[0] < img_dim[0] or im.shape[1] < img_dim[1]:
                continue
            shape = im.shape
            assert len(shape) == 3 # width, length and color channels
            assert shape[-1] == 3 # rgb, three channels
            
            # resizing
            im = cv2.resize(src=im, dsize=img_dim, interpolation=cv2.INTER_LINEAR)
            # Gaussian blurring
            im = cv2.GaussianBlur(im, (5, 5), 0)
            
            X[i] = np.asarray(im)
            labels.append(k)
            i += 1
        except IOError:
            continue
n_images = len(labels)
X = X[:n_images, ]
del i

In [6]:
labels_unique = list(set(labels))

Y = np.zeros((n_images, 1))
for i in range(n_images):
    Y[i, 0] = labels_unique.index(labels[i])

In [7]:
print(X.shape)
print(Y.shape)
print(np.min(X), np.max(X))

(4356, 224, 224, 3)
(4356, 1)
0.0 255.0


## Normalization

In [8]:
for i in range(X.shape[0]):
    m = np.min(X[i,])
    X[i] = (X[i,] - m) / (np.max(X[i,]) - m)

print(np.min(X), np.max(X))

0.0 1.0


# Reducing memory size

In [9]:
# Function to reduce the numpy array size
def reduce_mem_usage(a, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = a.nbytes / 1024**2
    dtype = a.dtype
    if dtype in numerics:
        c_min = a.min()
        c_max = a.max()
        if str(dtype)[:3] == 'int':
            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                a = a.astype(np.int8)
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                a = a.astype(np.int16)
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                a = a.astype(np.int32)
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                a = a.astype(np.int64)  
        else:
            if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                a = a.astype(np.float16)
            elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                a = a.astype(np.float32)
            else:
                a = a.astype(np.float64)
    end_mem = a.nbytes / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return a

X = reduce_mem_usage(X)
Y = reduce_mem_usage(Y)

Mem. usage decreased to 1250.65 Mb (75.0% reduction)
Mem. usage decreased to  0.01 Mb (75.0% reduction)


# Data Augmentation

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_gen_batch_size = 32
image_gen_train = ImageDataGenerator(
                    rotation_range=45,
                    width_shift_range=.1,
                    height_shift_range=.1,
                    horizontal_flip=True,
                    zoom_range=0.1)
image_gen_flow = image_gen_train.flow(X, Y, batch_size=img_gen_batch_size)
X_added = np.zeros((len(image_gen_flow) * img_gen_batch_size, *(X.shape[1:])))
Y_added = np.zeros((len(image_gen_flow) * img_gen_batch_size, *(Y.shape[1:])))
flow_len = len(image_gen_flow)
for i in range(0, flow_len):
    X_batch = image_gen_flow[i][0]
    img_gen_batch_size = X_batch.shape[0]
    X_added[i * img_gen_batch_size: (i + 1) * img_gen_batch_size,] = X_batch
    Y_added[i * img_gen_batch_size: (i + 1) * img_gen_batch_size,] = image_gen_flow[i][1]
del flow_len
del X_batch
del img_gen_batch_size

In [11]:
X = np.vstack([X, X_added])
Y = np.vstack([Y, Y_added])
del X_added
del Y_added

In [12]:
from tensorflow.keras.utils import to_categorical
Y = to_categorical(Y)
print(X.shape)
print(Y.shape)

(8740, 224, 224, 3)
(8740, 10)


# Data Splitting

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size)

# Building CNN

In [16]:
n_classes = n_birds

model = models.Sequential()
model.add(layers.Conv2D(64, 3, strides=1, input_shape=(img_dim[0], img_dim[1], 3)))
model.add(layers.Conv2D(64, 3, strides=1))
model.add(layers.MaxPooling2D(2, strides=2))

model.add(layers.Conv2D(128, 3, strides=1))
model.add(layers.Conv2D(128, 3, strides=1))
model.add(layers.MaxPooling2D(2, strides=2))

model.add(layers.Conv2D(256, 3, strides=1))
model.add(layers.Conv2D(256, 3, strides=1))
model.add(layers.Conv2D(256, 3, strides=1))
model.add(layers.MaxPooling2D(2, strides=2))

model.add(layers.Conv2D(512, 3, strides=1))
model.add(layers.Conv2D(512, 3, strides=1))
model.add(layers.Conv2D(512, 3, strides=1))
model.add(layers.MaxPooling2D(2, strides=2))

model.add(layers.Conv2D(512, 3, strides=1))
model.add(layers.Conv2D(512, 3, strides=1))
model.add(layers.Conv2D(512, 3, strides=1))
model.add(layers.MaxPooling2D(2, strides=2))

model.add(layers.Flatten())
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(n_classes, activation='relu'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 222, 222, 64)      1792      
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 220, 220, 64)      36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 110, 110, 64)      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 108, 108, 128)     73856     
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 106, 106, 128)     147584    
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 53, 53, 128)       0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 51, 51, 256)      

# Training

In [17]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

Train on 6292 samples, validate on 1748 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Evaluation

In [18]:
test_loss, test_acc = model.evaluate(X_test,  y_test, verbose=2)

1748/1 - 4s - loss: 4.8349 - accuracy: 0.1035


In [19]:
# Save the weights
model.save('model.h5')

In [20]:
del model
del history