In [1]:
import time
import math
import random
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
%matplotlib inline

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4,5'
tf.test.is_gpu_available()

True

# Configs & Hyperparameters

In [3]:
# batch size
batch_size = 32
# test split
test_size = 0.2

img_dim = (224, 224)

# paths
cache_path = 'cache'
data_path = 'data-filtered'

# Data Loading

In [4]:
import os

n_birds = 10 # number of kinds of birds to categorize
bird_file_map = {}
least_image_n = 1

# return array of bird names
birdList = sorted(os.listdir(data_path))
loadedImages = []

n_images = 0
n_birds_loaded = 0
for b in birdList:
    if n_birds_loaded >= n_birds:
        break
    print("Loading images for '" + b + "'")
    curdir = os.path.join(data_path, b)
    if not os.path.isdir(curdir):
        continue
    img_files = os.listdir(curdir)

    filenames = [os.path.join(curdir, f) for f in img_files]
    n_f = len(filenames)
    if n_f >= least_image_n: # use data only if more than xx images are found
        bird_file_map[b] = filenames
        print(n_f, "images loaded for '" + b + "'")
        n_birds_loaded += 1
        n_images += n_f
    else:
        print("Not enough data for '" + b + "'")

n_birds = len(bird_file_map.keys())

Loading images for 'Aberrant Bush-Warbler'
270 images loaded for 'Aberrant Bush-Warbler'
Loading images for 'Ala Shan Redstart'
401 images loaded for 'Ala Shan Redstart'
Loading images for 'Aleutian Tern'
282 images loaded for 'Aleutian Tern'
Loading images for 'Altai Snowcock'
244 images loaded for 'Altai Snowcock'
Loading images for 'American Wigeon'
486 images loaded for 'American Wigeon'
Loading images for 'Arctic Warbler'
329 images loaded for 'Arctic Warbler'
Loading images for 'Ashy Bulbul'
310 images loaded for 'Ashy Bulbul'
Loading images for 'Ashy Drongo'
412 images loaded for 'Ashy Drongo'
Loading images for 'Ashy Minivet'
443 images loaded for 'Ashy Minivet'
Loading images for 'Ashy Wood Pigeon'
335 images loaded for 'Ashy Wood Pigeon'
Loading images for 'Ashy Woodswallow'
306 images loaded for 'Ashy Woodswallow'
Loading images for 'Ashy-throated Parrotbill'
252 images loaded for 'Ashy-throated Parrotbill'
Loading images for 'Ashy-throated Warbler'
442 images loaded for 'As

In [5]:
import cv2
n_channels = 3
X = np.zeros((n_images, img_dim[0], img_dim[1], n_channels))
labels = []

i = 0
for k, v in bird_file_map.items():
    for file in v:
        try:
            im = cv2.imread(file)
            if im is None or im.shape[0] < img_dim[0] or im.shape[1] < img_dim[1]:
                continue
            shape = im.shape
            assert len(shape) == 3 # width, length and color channels
            assert shape[-1] == 3 # rgb, three channels
            
            # resizing
            im = cv2.resize(src=im, dsize=img_dim, interpolation=cv2.INTER_LINEAR)
            # Gaussian blurring
            # im = cv2.GaussianBlur(im, (5, 5), 0)
            X[i] = np.asarray(im)
            labels.append(k)
            i += 1
        except IOError:
            continue
n_images = len(labels)
X = X[:n_images, ]
del i

In [6]:
labels_unique = list(set(labels))

Y = np.zeros((n_images, 1))
for i in range(n_images):
    Y[i, 0] = labels_unique.index(labels[i])

In [7]:
print(X.shape)
print(Y.shape)
print(np.min(X), np.max(X))

(3790, 224, 224, 3)
(3790, 1)
0.0 255.0


## Normalization

In [8]:
for i in range(X.shape[0]):
    m = np.min(X[i,])
    X[i] = (X[i,] - m) / (np.max(X[i,]) - m)

print(np.min(X), np.max(X))

0.0 1.0


# Reducing memory size

In [9]:
# Function to reduce the numpy array size
def reduce_mem_usage(a, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = a.nbytes / 1024**2
    dtype = a.dtype
    if dtype in numerics:
        c_min = a.min()
        c_max = a.max()
        if str(dtype)[:3] == 'int':
            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                a = a.astype(np.int8)
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                a = a.astype(np.int16)
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                a = a.astype(np.int32)
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                a = a.astype(np.int64)
        else:
            if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                a = a.astype(np.float16)
            elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                a = a.astype(np.float32)
            else:
                a = a.astype(np.float64)
    end_mem = a.nbytes / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return a

X = reduce_mem_usage(X)
Y = reduce_mem_usage(Y)

Mem. usage decreased to 1088.14 Mb (75.0% reduction)
Mem. usage decreased to  0.01 Mb (75.0% reduction)


# Data Splitting

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size)
# X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size)

In [11]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
# y_valid = to_categorical(y_valid)
print(X_train.shape, '|', X_test.shape)
print(y_train.shape, '|', y_test.shape)

(3032, 224, 224, 3) | (758, 224, 224, 3)
(3032, 20) | (758, 20)


# Data Augmentation

In [12]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_gen_batch_size = 32
image_gen_train = ImageDataGenerator(
                    rotation_range=45,
                    width_shift_range=0.2,
                    height_shift_range=0.2,
                    horizontal_flip=True,
                    shear_range=0.2,
                    zoom_range=0.1)

image_gen_flow = image_gen_train.flow(X_train, y_train, batch_size=img_gen_batch_size)
X_added = np.zeros((len(image_gen_flow) * img_gen_batch_size, *(X_train.shape[1:])))
Y_added = np.zeros((len(image_gen_flow) * img_gen_batch_size, *(y_train.shape[1:])))
flow_len = len(image_gen_flow)

n_added = 0
for i in range(0, flow_len):
    X_batch = image_gen_flow[i][0]
    img_gen_batch_size = X_batch.shape[0]
    X_added[i * img_gen_batch_size: (i + 1) * img_gen_batch_size,] = X_batch
    Y_added[i * img_gen_batch_size: (i + 1) * img_gen_batch_size,] = image_gen_flow[i][1]
    n_added += img_gen_batch_size
print('data augmentation sucessful, {} new images in total were added'.format(n_added))
del flow_len
del X_batch
del img_gen_batch_size
X_train = np.vstack([X_train, X_added])
y_train = np.vstack([y_train, Y_added])
del X_added
del Y_added

data augmentation sucessful, 3032 new images in total were added


In [21]:
import pickle

f = open('x_train.pkl', 'wb')
pickle.dump(X_train, f)
f.close()

f = open('y_train.pkl', 'wb')
pickle.dump(y_train, f)
f.close()

f = open('x_test.pkl', 'wb')
pickle.dump(X_test, f)
f.close()

f = open('y_test.pkl', 'wb')
pickle.dump(y_test, f)
f.close()

ValueError: Expected 1D or 2D array, got 4D array instead