In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import json, os, csv, shutil
from glob import glob
import matplotlib.pyplot as plt

In [2]:
import keras
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Lambda, Activation, Flatten, Input
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, RMSprop, SGD
from keras.utils import np_utils

import h5py
import cv2
import PIL
from PIL import Image

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
path = 'data/'

In [4]:
batch_size = 32

# Data preprocessing

In [5]:
dir_names = os.listdir(os.path.join(path, 'train/'))
dir_names

['NoF', 'OTHER', 'ALB', 'DOL', 'BET', 'LAG', 'YFT', 'SHARK']

In [6]:
dir_dict = {}

for i in range(len(dir_names)):
    dir_dict[dir_names[i]] = i

In [7]:
dir_dict

{'ALB': 2,
 'BET': 4,
 'DOL': 3,
 'LAG': 5,
 'NoF': 0,
 'OTHER': 1,
 'SHARK': 7,
 'YFT': 6}

In [9]:
X_train = []
y_train = []
org_size = []
trn_filename = []

In [10]:
for folder in os.listdir(path + 'train/'):
    for file in os.listdir(path + 'train/' + folder + '/'):
        if os.path.isfile(path + 'train/' + folder + '/' + file):
            
            input_img = cv2.imread(path + 'train/' + folder + '/' + file)
            height, width, channel = input_img.shape
            input_img = cv2.resize(input_img, (224, 224))
            X_train.append(input_img)
            
            y_cat = np_utils.to_categorical(dir_dict[folder], 8) # 8 represent number of class
            y_train.append(y_cat)
            
            org_size.append([width, height])
            trn_filename.append(file)

In [11]:
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

In [12]:
print(X_train.shape)
print(y_train.shape)
print(org_size[0:5])
print(trn_filename[0:5])

(3025, 224, 224, 3)
(3025, 8)
[[1334, 750], [1276, 718], [1280, 720], [1280, 720], [1280, 720]]
['img_04395.jpg', 'img_06596.jpg', 'img_00681.jpg', 'img_04430.jpg', 'img_01560.jpg']


In [13]:
y_train[466]

array([0., 1., 0., 0., 0., 0., 0., 0.])

In [19]:
X_valid = []
y_valid = []
val_size = []
val_filename = []

In [20]:
for folder in os.listdir(path + 'valid/'):
    for file in os.listdir(path + 'valid/' + folder + '/'):
        if os.path.isfile(path + 'valid/' + folder + '/' + file):
            
            input_img = cv2.imread(path + 'valid/' + folder + '/' + file)
            height, width, channel = input_img.shape
            input_img = cv2.resize(input_img, (224, 224))
            X_valid.append(input_img)
            
            y_cat = np_utils.to_categorical(dir_dict[folder], 8) # 8 represent number of class
            y_valid.append(y_cat)
            
            val_size.append([width, height])
            val_filename.append(file)

In [21]:
X_valid = np.asarray(X_valid)
y_valid = np.asarray(y_valid)

In [22]:
print(X_valid.shape)
print(y_valid.shape)
print(val_size[0:5])
print(val_filename[0:5])

(752, 224, 224, 3)
(752, 8)
[[1192, 670], [1280, 720], [1276, 718], [1192, 670], [1280, 720]]
['img_05666.jpg', 'img_02685.jpg', 'img_06839.jpg', 'img_07847.jpg', 'img_05936.jpg']


In [23]:
y_valid[0]

array([1., 0., 0., 0., 0., 0., 0., 0.])

# BB

In [24]:
import ujson as json

In [25]:
anno_classes = ['alb', 'bet', 'dol', 'lag', 'other', 'shark', 'yft']

In [26]:
bb_json = {}

for c in anno_classes:
    j = json.load(open('{}annos/{}_labels.json'.format(path, c), 'r'))
    
    for l in j:
        if 'annotations' in l.keys() and len(l['annotations']) > 0:
            bb_json[l['filename']] = l['annotations'][-1]
        else:
            bb_json[l['filename']] 

In [27]:
bb_json['img_07763.jpg']

{'class': 'rect',
 'height': 127.00000000000045,
 'width': 121.00000000000045,
 'x': 636.0000000000023,
 'y': 353.00000000000125}

In [36]:
empty_bbox = {'height': 0., 'width': 0., 'x': 0., 'y': 0.}

In [37]:
for f in trn_filename:
    if not f in bb_json.keys(): bb_json[f] = empty_bbox
for f in val_filename:
    if not f in bb_json.keys(): bb_json[f] = empty_bbox

In [28]:
trn_resize_dim = []
val_resize_dim = []

In [63]:
def convert_bb(img, width, height):
    bb = []
    conv_x = (224. / width)
    conv_y = (224. / height)
    bb.append(bb_json[img]['height'] * conv_y)
    bb.append(bb_json[img]['width'] * conv_x)
    bb.append(max(bb_json[img]['x'] * conv_x, 0))
    bb.append(max(bb_json[img]['y'] * conv_y, 0))
    return bb

In [64]:
len(bb_json.keys())

4982

In [65]:
trn_bbox = []
val_bbox = []

In [66]:
for i in range(len(trn_filename)):
    trn_bbox.append(convert_bb(trn_filename[i], org_size[i][0], org_size[i][1]))

In [67]:
for i in range(len(val_filename)):
    val_bbox.append(convert_bb(val_filename[i], val_size[i][0], val_size[i][1]))

In [74]:
trn_bbox = np.asarray(trn_bbox)
val_bbox = np.asarray(val_bbox)

In [75]:
print(trn_bbox[-1200])
print(val_bbox[-600])

[33.39208926 47.58642591 52.3658486  12.09023922]
[ 16.62407892  20.98789964  39.48218744 131.88435945]


# NN

In [71]:
input_img = Input(shape=(224, 224, 3))

x = Conv2D(32, (3, 3), padding = 'same', activation = 'relu')(input_img)
# x = Conv2D(32, (3, 3), padding = 'same', activation = 'relu')(x)
x = MaxPooling2D((2, 2))(x)

x = Conv2D(64, (3, 3), padding = 'same', activation = 'relu')(x)
# x = Conv2D(64, (3, 3), padding = 'same', activation = 'relu')(x)
x = MaxPooling2D((2, 2))(x)

x = Flatten()(x)
x = Dense(512, activation = 'relu')(x)
# x = Dense(512, activation = 'relu')(x)

x_bb = Dense(4, name='bb')(x)
x_class = Dense(8, activation='softmax', name='class')(x)

model = Model([input_img], [x_bb, x_class])
model.compile(Adam(lr=0.001), loss=['mse', 'categorical_crossentropy'], metrics=['accuracy'],
             loss_weights=[.001, 1.])

In [72]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 224, 224, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 112, 112, 32) 0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 112, 112, 64) 18496       max_pooling2d_1[0][0]            
__________________________________________________________________________________________________
max_poolin

In [76]:
model.fit(X_train, [trn_bbox, y_train], batch_size=batch_size, nb_epoch=3, 
             validation_data=(X_valid, [val_bbox, y_valid]))

  


Train on 3025 samples, validate on 752 samples
Epoch 1/3
 384/3025 [==>...........................] - ETA: 40:58 - loss: 23663.9363 - bb_loss: 23648697.9042 - class_loss: 15.2366 - bb_acc: 0.2266 - class_acc: 0.0547

KeyboardInterrupt: 