## Import library

In [52]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
from datetime import datetime
from pathlib import Path
from glob import glob # parse the files name
from PIL import Image
import matplotlib.pylab as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

from subprocess import check_output

In [41]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as k
from keras.utils import plot_model
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.utils import multi_gpu_model
from keras.applications import VGG16
from keras.callbacks import TensorBoard
from keras.callbacks import EarlyStopping

In [3]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
#os.environ["CUDA_VISIBLE_DEVICES"] = 0,1 #  '':表示強迫使用cpu

## Set data path

In [4]:
main_path = Path('../../../data/humpback-whale-identification-challenge')
train_path = main_path / 'train'
test_path = main_path / 'test'
train_images = glob(str(train_path / '*.jpg'))
test_image = glob(str(test_path / '*.jpg'))
df = pd.read_csv(str(main_path / 'train_data_clean.csv'))

In [5]:
check_image_list = df['Image'].tolist()
clean_train_images = []
for im in train_images:
    if im in check_image_list:
        clean_train_images.append(im)
print('Number of clean images:{}'.format(len(clean_train_images)))        

Number of clean images:8295


In [6]:
print('Number of train images: {}\nNumber of test images: {}'.format(
    len(train_images), len(test_image)))

Number of train images: 9850
Number of test images: 15610


In [7]:
# df['Image'] = df['Image'].map(lambda x: str(train_path / x)) # 已在clean的時候處理完
ImageToLabelDict = dict(zip(df['Image'], df['Id']))

In [8]:
im = Image.open(df['Image'][0])
print(im.format, im.size, im.mode)

JPEG (699, 500) L


In [32]:
%%time
SIZE = 100
def ImportImage(filename):
    img = Image.open(filename).resize((SIZE, SIZE))
    img = np.array(img)
    if img.ndim == 2: #imported BW picture and converting to "dumb RGB"
        img = np.tile(img, (3, 1, 1)).transpose((1, 2, 0))
    return img
x_train = np.array([ImportImage(img) for img in df['Image'].values], dtype=np.uint8)
print(x_train.shape[0], 'train samples')

8295 train samples


In [10]:
print('Number of sample/class\t number of classes')
for index, val in df['Id'].value_counts().value_counts().sort_index().iteritems():
    print('{}\t\t\t {}'.format(index, val))

Number of sample/class	 number of classes
1			 2401
2			 961
3			 373
4			 143
5			 61
6			 41
7			 25
8			 17
9			 10
10			 8
11			 5
12			 4
13			 8
14			 6
15			 2
16			 3
17			 3
18			 1
19			 1
20			 1
21			 2
22			 1
23			 1
24			 1
27			 1
634			 1


We read that the classes are very **unbalanced**: one class has ~800 samples while ~2000 have only one example in the training set. This calls for a lot of data augmentation.

## One hot encoding on the labels
Using a composition of a LabelEncoder and OneHotEncoder to one hot encode the target tail kinds.

In [11]:
class LabelOneHotEncoder():
    def __init__(self):
        self.le = LabelEncoder() # label to number
        self.ohe = OneHotEncoder() # number to encoding 
    def fit_transform(self, x):
        features = self.le.fit_transform(x)
        return self.ohe.fit_transform(features.reshape(-1, 1))
    def transform(self, x):
        return self.ohe.transform(self.le.transform(x.reshape(-1, 1)))
    def inverse_tranform(self, x):
        return self.le.inverse_transform(self.ohe.inverse_tranform(x))
    def inverse_labels(self, x):
        return self.le.inverse_transform(x)

In [13]:
def check_data(x):
    if not x is None:
        return x
    

In [14]:
y = list(filter(check_data, map(ImageToLabelDict.get, train_images)))
lohe = LabelOneHotEncoder()
y_cat = lohe.fit_transform(y)

In [65]:
y_cat

<8295x4081 sparse matrix of type '<class 'numpy.float64'>'
	with 8295 stored elements in Compressed Sparse Row format>

In [15]:
len(y), y_cat.shape

(8295, (8295, 4081))

## Image augmentation with Keras

In [16]:
# use of an image generator for preprocessing and data augmentation
x_train = x_train.reshape((-1, SIZE, SIZE, 3))
input_shape = x_train[0].shape
#x_train = x.astype('float32')
y_train = y_cat

In [17]:
image_gen = ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    horizontal_flip=True    
)
# training the image preprocessing
image_gen.fit(x_train, augment=True)


## Building and training model

In [66]:
batch_size = 256
num_classes = len(y_cat.toarray()[0])
epochs = x_train.shape[0]//batch_size + 1
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')


x_train shape: (8295, 100, 100, 3)
8295 train samples


In [67]:
# use vgg16 model
model = Sequential()
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

In [68]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 100, 100, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
__________

In [69]:
# freeze layers
for layer in conv_base.layers:
    layer.trainable = False

In [70]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 100, 100, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
__________

In [71]:
# maybe unfreeze last layer
conv_base.layers[-2].trainable = True

In [72]:
model.add(conv_base)
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(48, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(48, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [73]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 100, 100, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
__________

In [74]:
isMultiGPU = 1
if isMultiGPU:
    parallel_model = multi_gpu_model(model, 2)
    parallel_model.compile(loss=keras.losses.categorical_crossentropy,
                           optimizer=keras.optimizers.Adadelta(),
                           metrics=['accuracy'])
else:
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

In [75]:
plot_model(model, to_file='whale_model_01.png')

In [77]:
tbCallBack = [keras.callbacks.EarlyStopping(monitor='loss', 
                                            min_delta=0.001, 
                                            patience=5, 
                                            verbose=0, 
                                            mode='auto')]

#keras.callbacks.TensorBoard(log_dir='/tmp/tensorboard_log', 
#                                          histogram_freq=1, 
#                                          write_graph=True, 
#                                          write_images=True),

if isMultiGPU:
    parallel_model.fit_generator(image_gen.flow(x_train, y_train.toarray(), 
                                                batch_size=batch_size),
                                 steps_per_epoch=x_train.shape[0]//batch_size,
                                 epochs=epochs,
                                 shuffle=True,
                                 verbose=1,
                                 callbacks=tbCallBack)
else:
    model.fit_generator(image_gen.flow(x_train, y_train.toarray(), 
                                       batch_size=batch_size),
                        steps_per_epoch=x_train.shape[0]//batch_size,
                        epochs=epochs,
                        verbose=1,
                        class_weight=class_weight_dict,
                        callbacks=tbCallBack)

Epoch 1/33
Epoch 2/33
Epoch 3/33
Epoch 4/33
Epoch 5/33
Epoch 6/33
Epoch 7/33
Epoch 8/33


## Predictions on test samples and export for submission

In [81]:
%%time
import warnings
import os
sub_name = 'submission_{}.csv'.format(datetime.now().strftime('%Y%m%d%H%M%S'))
with open(str(main_path / sub_name), 'w') as f:
    with warnings.catch_warnings():
        f.write("Image,Id\n")
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        for img in test_image:
            tmp_img = ImportImage(img)
            x = tmp_img.astype('float32')
            x = image_gen.standardize(x.reshape(1, SIZE, SIZE, 3))
            y = model.predict_proba(x.reshape(1, SIZE, SIZE, 3))        
            predicted_args = np.argsort(y)[0][::-1][:5]
            predicted_tags = lohe.inverse_labels(predicted_args)
            img = os.path.split(img)[-1]
            predicted_tags = " ".join(predicted_tags)
            f.write('%s,%s\n' %(img, predicted_tags))
            

CPU times: user 4min 12s, sys: 12.3 s, total: 4min 24s
Wall time: 6min 53s


In [48]:
tmp_img.shape, x.shape

((100, 100, 3), (1, 100, 100, 3))