In [2]:
import os, random, glob, pickle, collections, math, json
import numpy as np
import pandas as pd
from __future__ import division
from __future__ import print_function
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
%matplotlib inline 

from keras.models import Sequential, Model, load_model, model_from_json
from keras import layers
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU, Conv2D, Input, BatchNormalization, Activation
from keras.regularizers import l2
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K
K.set_image_dim_ordering('tf')

Using TensorFlow backend.


In [3]:
TEST_DIR = '../RFCN/JPEGImages/'
RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
CROP_MODEL = 'resnet50_FT_Hybrid_woNoF'
if os.getcwd().split('/')[-1] != CROP_MODEL:
    print('WRONG CROP_MODEL DIR!!!')
CHECKPOINT_DIR = './checkpoint/'
if not os.path.exists(CHECKPOINT_DIR):
    os.mkdir(CHECKPOINT_DIR)
LOG_DIR = './log/'
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)
OUTPUT_DIR = './output/'
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
CROP_CLASSES=FISH_CLASSES[:]
CROP_CLASSES.remove('NoF')
TESTFRAC = 0.9
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BATCHSIZE = 128
LEARNINGRATE = 1e-4
def featurewise_center(x):
    mean = np.mean(x, axis=0, keepdims=True)
    mean = np.mean(mean, axis=(1,2), keepdims=True)
    x_centered = x - mean
    return x_centered

def featurewise_mean(x):
    mean = np.mean(x, axis=0)
    mean = np.mean(mean, axis=(0,1))
    return mean

def preprocess_featurewise(x, featurewise_mean):
    x -= np.reshape(featurewise_mean, [1, 1, 3])
    return x

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    width_cropped, height_cropped = cropped.size
    if height_cropped > width_cropped: cropped = cropped.transpose(method=2)  
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]), Image.BILINEAR)
    return cropped

def get_best_model(checkpoint_dir = CHECKPOINT_DIR):
    files = glob.glob(checkpoint_dir+'*')
    val_losses = [float(f.split('-')[-1][:-5]) for f in files]
    index = val_losses.index(min(val_losses))
    print('Loading model from checkpoint file ' + files[index])
    model = load_model(files[index])
    model_name = files[index].split('/')[-1]
    print('Loading model Done!')
    return (model, model_name)

def data_from_df(df):
    X = np.ndarray((df.shape[0], ROWS, COLS, 3), dtype=np.uint8)
    y = np.zeros((df.shape[0], len(CROP_CLASSES)), dtype=K.floatx())
    i = 0
    for index,row in df.iterrows():
        image_file = row['image_file']
        fish = row['crop_class']
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
        X[i] = np.asarray(cropped)
        y[i,CROP_CLASSES.index(fish)] = 1
        i += 1
    return (X, y)

def data_load(name):
    file_name = 'data_'+name+'_{}_{}.pickle'.format(ROWS, COLS)
    if os.path.exists(OUTPUT_DIR+file_name):
        print ('Loading from file '+file_name)
        with open(OUTPUT_DIR+file_name, 'rb') as f:
            data = pickle.load(f)
        X = data['X']
        y = data['y']
    else:
        print ('Generating file '+file_name)
        
        if name=='train' or name=='valid': 
            df = GTbbox_df[GTbbox_df['split']==name]
        elif name=='all':
            df = GTbbox_df
        else:
            print('Invalid name '+name)
    
        X, y = data_from_df(df)

        data = {'X': X,'y': y}
        with open(OUTPUT_DIR+file_name, 'wb') as f:
            pickle.dump(data, f)
    X = X.astype(np.float32)
    X /= 255.
    return (X, y)

In [4]:
# GTbbox_df = ['image_file','image_class','crop_index','crop_class','xmin',''ymin','xmax','ymax','split']

file_name = 'GTbbox_df.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    GTbbox_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)       
    GTbbox_df = pd.DataFrame(columns=['image_file','image_class','crop_index','crop_class','xmin','ymin','xmax','ymax'])  

    crop_classes=FISH_CLASSES[:]
    crop_classes.remove('NoF')
    with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
        train_file_labels = f.readlines()
        
    for c in crop_classes:
        print(c)
        j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r'))
        for l in j: 
            filename = l["filename"]
            head, image_file = os.path.split(filename)
            basename, file_extension = os.path.splitext(image_file)
            for i in range(len(train_file_labels)):
                if train_file_labels[i][:9] == basename:
                    image_class = train_file_labels[i][10:-1]
                    break
            image = Image.open(TEST_DIR+'/'+image_file)
            width_image, height_image = image.size
            for i in range(len(l["annotations"])):
                a = l["annotations"][i]
                xmin = (a["x"])
                ymin = (a["y"])
                width = (a["width"])
                height = (a["height"])
                xmax = xmin + width
                ymax = ymin + height
                assert max(xmin,0)<min(xmax,width_image)
                assert max(ymin,0)<min(ymax,height_image)
                GTbbox_df.loc[len(GTbbox_df)]=[image_file,image_class,i,a["class"],max(xmin,0),max(ymin,0),min(xmax,width_image),min(ymax,height_image)] 

    image_files_df = GTbbox_df[['image_file','image_class']]
    image_files_df.drop_duplicates(inplace = True)
    test_file_nb = image_files_df.shape[0] - int(math.ceil(image_files_df.shape[0]*TESTFRAC))
    train_files_df, valid_files_df = train_test_split(image_files_df, test_size=test_file_nb, random_state=1986, stratify=image_files_df.image_class)
    GTbbox_df['split'] = ['train' if image_file in train_files_df.image_file.tolist() else 'valid' for image_file in GTbbox_df.image_file]

    GTbbox_df.to_pickle(OUTPUT_DIR+file_name)
    print('Done')

# GTbbox_df.loc[(GTbbox_df['image_class']!='NoF') & (GTbbox_df['crop_class']!='NoF') & (GTbbox_df['image_class']!=GTbbox_df['crop_class'])]

Loading from file GTbbox_df.pickle


In [11]:
#Load data

X_all, y_all = data_load('all')
X_train, y_train = data_load('train')
X_valid, y_valid = data_load('valid')  
print('Loading data done.')

print('all sample ', X_all.shape[0])
print('train sample ', X_train.shape[0])
print('valid sample ', X_valid.shape[0])

print('featurewise mean of X_all is ', featurewise_mean(X_all))
print('featurewise mean of X_train is ', featurewise_mean(X_train))
print('featurewise mean of X_valid is ', featurewise_mean(X_valid))


Loading from file data_all_224_224.pickle
Loading from file data_train_224_224.pickle
Loading from file data_valid_224_224.pickle
Loading data done.
all sample  4371
train sample  3948
valid sample  423
featurewise mean of X_all is  [ 0.40696082  0.43766695  0.3947188 ]
featurewise mean of X_train is  [ 0.40635169  0.43670505  0.39435506]
featurewise mean of X_valid is  [ 0.41260275  0.4466438   0.39812338]


In [12]:
# #class weight = n_samples / (n_classes * np.bincount(y))
# class_weight_fish = dict(GTbbox_df.groupby('crop_class').size())
# class_weight = {}
# n_samples = GTbbox_df.shape[0]
# for key,value in class_weight_fish.items():
#         class_weight[CROP_CLASSES.index(key)] = n_samples / (len(CROP_CLASSES)*value)
# class_weight

train_df = GTbbox_df[GTbbox_df['split']=='train']
class_weight_fish = dict(train_df.groupby('crop_class').size())
class_weight = {}
ref = max(class_weight_fish.values())
for key,value in class_weight_fish.items():
    class_weight[CROP_CLASSES.index(key)] = ref/value
class_weight

{0: 1.0,
 1: 8.1035714285714278,
 2: 19.903508771929825,
 3: 24.138297872340427,
 4: 7.5132450331125824,
 5: 13.269005847953217,
 6: 3.1601671309192199}

In [13]:
#data preprocessing

all_mean = featurewise_mean(X_all)
def preprocessing_function(x):
    x -= np.reshape(all_mean, [1, 1, 3])
    return x

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocessing_function,
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCHSIZE, shuffle=True, seed=None)
# assert X_train.shape[0]%BATCHSIZE==0
steps_per_epoch = int(math.floor(X_train.shape[0]/BATCHSIZE))

X_valid_centered = X_valid - np.reshape(all_mean, [1, 1, 1, 3])


In [16]:
#callbacks

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=25, verbose=1, mode='auto')        

model_checkpoint = ModelCheckpoint(filepath=CHECKPOINT_DIR+'weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
        
learningrate_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)

tensorboard = TensorBoard(log_dir=LOG_DIR, histogram_freq=0, write_graph=False, write_images=True)


In [None]:
# Resnet50
#top layer training to end

from keras.applications.resnet50 import ResNet50

base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# x = Dropout(0.5)(x)
predictions = Dense(len(CROP_CLASSES), kernel_initializer='glorot_normal', activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=1e-4)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE:', BATCHSIZE)
# train the model on the new data for a few epochs
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True)

In [12]:
### Resnet50
# fine tuning stg1
# 164 conv5c+top
# 142 conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 164

model, model_name = get_best_model()
# model = load_model(CHECKPOINT_DIR + 'weights.029-0.3772.hdf5')
# model_name = 'weights.029-0.3772.hdf5'
# print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=350)

Loading model from checkpoint file ./checkpoint/weights.349-0.8371.hdf5
Loading model Done!
BATCHSIZE 128
Epoch 351/2000
Epoch 352/2000
Epoch 353/2000
Epoch 354/2000
Epoch 355/2000
Epoch 356/2000
Epoch 357/2000
Epoch 358/2000
Epoch 359/2000
Epoch 360/2000
Epoch 361/2000
Epoch 362/2000
Epoch 363/2000
Epoch 364/2000
Epoch 365/2000
Epoch 366/2000
Epoch 367/2000
Epoch 368/2000
Epoch 369/2000
Epoch 370/2000
Epoch 371/2000
Epoch 372/2000
Epoch 373/2000
Epoch 374/2000
Epoch 375/2000
Epoch 376/2000
Epoch 377/2000
Epoch 378/2000
Epoch 379/2000
Epoch 380/2000
Epoch 381/2000
Epoch 382/2000
Epoch 383/2000
Epoch 384/2000
Epoch 385/2000
Epoch 386/2000
Epoch 387/2000
Epoch 388/2000
Epoch 389/2000
Epoch 390/2000
Epoch 391/2000
Epoch 392/2000
Epoch 393/2000
Epoch 394/2000
Epoch 395/2000
Epoch 396/2000
Epoch 397/2000
Epoch 398/2000
Epoch 399/2000
Epoch 400/2000
Epoch 401/2000
Epoch 402/2000
Epoch 403/2000
Epoch 404/2000
Epoch 405/2000
Epoch 406/2000
Epoch 407/2000
Epoch 408/2000
Epoch 409/2000
Epoch 410

<keras.callbacks.History at 0x7fbfa61f3c50>

In [8]:
### Resnet50
# fine tuning stg2
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 154

# model, model_name = get_best_model()
model = load_model(CHECKPOINT_DIR + 'weights.539-0.3229.hdf5')
model_name = 'weights.539-0.3229.hdf5'
print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=540)

model_name weights.539-0.3229.hdf5
BATCHSIZE 128
Epoch 541/2000
Epoch 542/2000
Epoch 543/2000
Epoch 544/2000
Epoch 545/2000
Epoch 546/2000
Epoch 547/2000
Epoch 548/2000
Epoch 549/2000
Epoch 550/2000
Epoch 551/2000
Epoch 552/2000
Epoch 553/2000
Epoch 554/2000
Epoch 555/2000
Epoch 556/2000
Epoch 557/2000
Epoch 558/2000
Epoch 559/2000
Epoch 560/2000
Epoch 561/2000
Epoch 562/2000
Epoch 563/2000
Epoch 564/2000
Epoch 565/2000
Epoch 566/2000
Epoch 567/2000
Epoch 568/2000
Epoch 569/2000
Epoch 570/2000
Epoch 571/2000
Epoch 572/2000
Epoch 573/2000
Epoch 574/2000
Epoch 575/2000
Epoch 576/2000
Epoch 577/2000
Epoch 578/2000
Epoch 579/2000
Epoch 580/2000
Epoch 581/2000
Epoch 582/2000
Epoch 583/2000
Epoch 584/2000

Epoch 00583: reducing learning rate to 9.99999974738e-07.
Epoch 585/2000
Epoch 586/2000
Epoch 587/2000
Epoch 588/2000
Epoch 589/2000
Epoch 590/2000
Epoch 591/2000
Epoch 592/2000
Epoch 593/2000
Epoch 594/2000
Epoch 595/2000
Epoch 596/2000
Epoch 597/2000
Epoch 598/2000
Epoch 599/2000
Epoch 6

<keras.callbacks.History at 0x7face852be90>

In [9]:
### Resnet50
# fine tuning stg3
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top i.e. conv5a+conv5b+conv5c+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 142

model, model_name = get_best_model()
# model = load_model(CHECKPOINT_DIR + 'weights.539-0.3229.hdf5')
# model_name = 'weights.539-0.3229.hdf5'
# print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=631)

Loading model from checkpoint file ./checkpoint/weights.630-0.2407.hdf5
Loading model Done!
BATCHSIZE 128
Epoch 632/2000
Epoch 633/2000
Epoch 634/2000
Epoch 635/2000
Epoch 636/2000
Epoch 637/2000
Epoch 638/2000
Epoch 639/2000
Epoch 640/2000
Epoch 641/2000
Epoch 642/2000
Epoch 643/2000
Epoch 644/2000
Epoch 645/2000
Epoch 646/2000
Epoch 647/2000
Epoch 648/2000
Epoch 649/2000
Epoch 650/2000
Epoch 651/2000
Epoch 652/2000
Epoch 653/2000
Epoch 654/2000
Epoch 655/2000
Epoch 656/2000
Epoch 657/2000
Epoch 658/2000
Epoch 659/2000
Epoch 660/2000
Epoch 661/2000
Epoch 662/2000
Epoch 663/2000
Epoch 664/2000
Epoch 665/2000
Epoch 666/2000
Epoch 667/2000
Epoch 668/2000
Epoch 669/2000
Epoch 670/2000
Epoch 671/2000
Epoch 672/2000
Epoch 673/2000
Epoch 674/2000
Epoch 675/2000
Epoch 676/2000
Epoch 677/2000
Epoch 678/2000
Epoch 679/2000
Epoch 680/2000
Epoch 681/2000
Epoch 682/2000
Epoch 683/2000
Epoch 684/2000
Epoch 685/2000
Epoch 686/2000
Epoch 687/2000
Epoch 688/2000
Epoch 689/2000
Epoch 690/2000
Epoch 691

<keras.callbacks.History at 0x7facd3ff0210>

In [12]:
### Resnet50
# fine tuning stg4
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top i.e. conv5a+conv5b+conv5c+top
# 132 conv4f+conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 132

model, model_name = get_best_model()
# model = load_model(CHECKPOINT_DIR + 'weights.539-0.3229.hdf5')
# model_name = 'weights.539-0.3229.hdf5'
# print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=684)

Loading model from checkpoint file ./checkpoint/weights.683-0.1587.hdf5
Loading model Done!
BATCHSIZE 128
Epoch 685/2000
Epoch 686/2000
Epoch 687/2000
Epoch 688/2000
Epoch 689/2000
Epoch 690/2000
Epoch 691/2000
Epoch 692/2000
Epoch 693/2000
Epoch 694/2000
Epoch 695/2000
Epoch 696/2000
Epoch 697/2000
Epoch 698/2000
Epoch 699/2000
Epoch 700/2000
Epoch 701/2000
Epoch 702/2000
Epoch 703/2000
Epoch 704/2000
Epoch 705/2000
Epoch 706/2000
Epoch 707/2000
Epoch 708/2000
Epoch 709/2000
Epoch 710/2000
Epoch 711/2000
Epoch 712/2000
Epoch 713/2000
Epoch 714/2000
Epoch 715/2000
Epoch 716/2000
Epoch 717/2000
Epoch 718/2000
Epoch 719/2000
Epoch 720/2000
Epoch 721/2000
Epoch 722/2000
Epoch 723/2000
Epoch 724/2000
Epoch 725/2000
Epoch 726/2000
Epoch 727/2000
Epoch 728/2000
Epoch 729/2000
Epoch 730/2000
Epoch 731/2000
Epoch 732/2000
Epoch 733/2000
Epoch 734/2000
Epoch 735/2000
Epoch 736/2000

Epoch 00735: reducing learning rate to 9.99999974738e-07.
Epoch 737/2000
Epoch 738/2000
Epoch 739/2000
Epoch 740/

<keras.callbacks.History at 0x7faca23e9ad0>

In [None]:
### Resnet50
# fine tuning stg5
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top i.e. conv5a+conv5b+conv5c+top
# 132 conv4f+conv5+top
# 122 conv4e+conv4f+conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 80

model, model_name = get_best_model()
# model = load_model(CHECKPOINT_DIR + 'weights.539-0.3229.hdf5')
# model_name = 'weights.539-0.3229.hdf5'
# print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=725)

Loading model from checkpoint file ./checkpoint/weights.724-0.1355.hdf5
Loading model Done!
BATCHSIZE 128
Epoch 726/2000
Epoch 727/2000
Epoch 728/2000
Epoch 729/2000
Epoch 730/2000
Epoch 731/2000
Epoch 732/2000
Epoch 733/2000
Epoch 734/2000
Epoch 735/2000
Epoch 736/2000
Epoch 737/2000
Epoch 738/2000
Epoch 739/2000
Epoch 740/2000
Epoch 741/2000
Epoch 742/2000
Epoch 743/2000
Epoch 744/2000
Epoch 745/2000
Epoch 746/2000
Epoch 747/2000
Epoch 748/2000
Epoch 749/2000
Epoch 750/2000
Epoch 751/2000
Epoch 752/2000
Epoch 753/2000
Epoch 754/2000
Epoch 755/2000
Epoch 756/2000
Epoch 757/2000
Epoch 758/2000
Epoch 759/2000
Epoch 760/2000
Epoch 761/2000
Epoch 762/2000
Epoch 763/2000
Epoch 764/2000
Epoch 765/2000
Epoch 766/2000
Epoch 767/2000
Epoch 768/2000
Epoch 769/2000
Epoch 770/2000
Epoch 771/2000
Epoch 773/2000
Epoch 774/2000
Epoch 775/2000
Epoch 776/2000
Epoch 777/2000
Epoch 778/2000
Epoch 779/2000
Epoch 780/2000
Epoch 781/2000

Epoch 00780: reducing learning rate to 9.99999997475e-08.
Epoch 782/

<keras.callbacks.History at 0x7fab364f4990>

In [8]:
### Resnet50
# fine tuning stg5
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top i.e. conv5a+conv5b+conv5c+top
# 132 conv4f+conv5+top
# 122 conv4e+conv4f+conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 38

model, model_name = get_best_model()
# model = load_model(CHECKPOINT_DIR + 'weights.539-0.3229.hdf5')
# model_name = 'weights.539-0.3229.hdf5'
# print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=772)

Loading model from checkpoint file ./checkpoint/weights.771-0.1162.hdf5
Loading model Done!
BATCHSIZE 128
Epoch 773/2000
Epoch 774/2000
Epoch 775/2000
Epoch 776/2000
Epoch 777/2000
Epoch 778/2000
Epoch 779/2000
Epoch 780/2000
Epoch 781/2000
Epoch 782/2000
Epoch 783/2000
Epoch 784/2000

Epoch 00783: reducing learning rate to 9.99999974738e-07.
Epoch 785/2000
Epoch 786/2000
Epoch 787/2000
Epoch 788/2000
Epoch 789/2000
Epoch 790/2000
Epoch 791/2000
Epoch 792/2000
Epoch 793/2000
Epoch 794/2000
Epoch 795/2000
Epoch 796/2000
Epoch 797/2000
Epoch 798/2000
Epoch 799/2000
Epoch 800/2000
Epoch 801/2000
Epoch 802/2000
Epoch 803/2000
Epoch 804/2000
Epoch 805/2000
Epoch 806/2000
Epoch 807/2000
Epoch 808/2000
Epoch 809/2000
Epoch 810/2000
Epoch 811/2000
Epoch 812/2000
Epoch 813/2000

Epoch 00812: reducing learning rate to 9.99999997475e-08.
Epoch 814/2000
Epoch 815/2000
Epoch 816/2000
Epoch 817/2000
Epoch 818/2000
Epoch 819/2000
Epoch 820/2000
Epoch 821/2000
Epoch 822/2000
Epoch 823/2000

Epoch 0082

<keras.callbacks.History at 0x7fad6959ec10>

In [15]:
### Resnet50
# fine tuning stg5
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top i.e. conv5a+conv5b+conv5c+top
# 132 conv4f+conv5+top
# 122 conv4e+conv4f+conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 2

# model, model_name = get_best_model()
model = load_model(CHECKPOINT_DIR + 'weights.771-0.1162.hdf5')
model_name = 'weights.771-0.1162.hdf5'
print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=772)

model_name weights.771-0.1162.hdf5
BATCHSIZE 64
Epoch 773/2000
Epoch 774/2000
Epoch 775/2000
Epoch 776/2000
Epoch 777/2000
Epoch 778/2000
Epoch 779/2000
Epoch 780/2000
Epoch 781/2000
Epoch 782/2000
Epoch 783/2000
Epoch 784/2000
Epoch 785/2000
Epoch 786/2000
Epoch 787/2000
Epoch 788/2000
Epoch 789/2000
Epoch 790/2000
Epoch 791/2000
Epoch 792/2000
Epoch 793/2000
Epoch 794/2000
Epoch 795/2000
Epoch 796/2000
Epoch 797/2000
Epoch 798/2000
Epoch 799/2000
Epoch 800/2000
Epoch 801/2000
Epoch 802/2000
Epoch 803/2000
Epoch 804/2000
Epoch 805/2000
Epoch 806/2000
Epoch 807/2000

Epoch 00806: reducing learning rate to 9.99999974738e-07.
Epoch 808/2000
Epoch 809/2000
Epoch 810/2000
Epoch 811/2000
Epoch 812/2000
Epoch 813/2000
Epoch 814/2000
Epoch 815/2000
Epoch 816/2000
Epoch 817/2000

Epoch 00816: reducing learning rate to 9.99999997475e-08.
Epoch 818/2000
Epoch 819/2000
Epoch 820/2000
Epoch 821/2000
Epoch 822/2000
Epoch 00821: early stopping


<keras.callbacks.History at 0x7fad3517c390>

In [17]:
### Resnet50
# fine tuning stg5
# 164 conv5c+top
# 154 conv5b+conv5c+top
# 142 conv5+top i.e. conv5a+conv5b+conv5c+top
# 132 conv4f+conv5+top
# 122 conv4e+conv4f+conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
# 6 conv2+conv3+conv4+conv5+top
# 2 conv1+conv2+conv3+conv4+conv5+top i.e. FROM SCRATCH
start_layer = 2

# model, model_name = get_best_model()
model = load_model(CHECKPOINT_DIR + 'weights.349-0.8371.hdf5')
model_name = 'weights.349-0.8371.hdf5'
print('model_name', model_name)

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

print('BATCHSIZE', BATCHSIZE)
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered, y_valid), 
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=350)

model_name weights.349-0.8371.hdf5
BATCHSIZE 64
Epoch 351/2000
Epoch 352/2000
Epoch 353/2000
Epoch 354/2000
Epoch 355/2000
Epoch 356/2000
Epoch 357/2000
Epoch 358/2000
Epoch 359/2000
Epoch 360/2000
Epoch 361/2000
Epoch 362/2000
Epoch 363/2000
Epoch 364/2000
Epoch 365/2000
Epoch 366/2000
Epoch 367/2000
Epoch 368/2000
Epoch 369/2000
Epoch 370/2000
Epoch 371/2000
Epoch 372/2000
Epoch 373/2000
Epoch 374/2000
Epoch 375/2000
Epoch 376/2000
Epoch 377/2000
Epoch 378/2000
Epoch 379/2000
Epoch 380/2000
Epoch 381/2000
Epoch 382/2000
Epoch 383/2000
Epoch 384/2000
Epoch 385/2000
Epoch 386/2000
Epoch 387/2000
Epoch 388/2000
Epoch 389/2000
Epoch 390/2000
Epoch 391/2000
Epoch 392/2000
Epoch 393/2000
Epoch 394/2000
Epoch 395/2000
Epoch 396/2000
Epoch 397/2000
Epoch 398/2000
Epoch 399/2000
Epoch 400/2000
Epoch 401/2000
Epoch 402/2000
Epoch 403/2000
Epoch 404/2000
Epoch 405/2000
Epoch 406/2000
Epoch 407/2000
Epoch 408/2000
Epoch 409/2000
Epoch 410/2000
Epoch 411/2000
Epoch 412/2000
Epoch 413/2000
Epoch 4

<keras.callbacks.History at 0x7fad25380e50>

In [None]:
#resume training

model, model_name = get_best_model()
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model(CHECKPOINT_DIR + 'weights.011-1.7062.hdf5')

# optimizer = Adam(lr=1e-4)
# model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=2000, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid_centered,y_valid),
                    class_weight=class_weight, workers=3, pickle_safe=True, initial_epoch=760)

Loading model from checkpoint file ./checkpoint/weights.759-0.1231.hdf5
Loading model Done!
Epoch 761/2000
Epoch 762/2000
Epoch 763/2000
Epoch 764/2000
Epoch 765/2000
Epoch 766/2000
Epoch 767/2000
Epoch 768/2000
Epoch 769/2000
Epoch 770/2000
Epoch 771/2000
Epoch 772/2000
Epoch 773/2000
Epoch 774/2000
Epoch 775/2000
Epoch 776/2000

Epoch 00775: reducing learning rate to 9.99999974738e-07.
Epoch 777/2000
Epoch 778/2000
Epoch 779/2000
Epoch 780/2000
Epoch 781/2000
Epoch 782/2000
Epoch 783/2000
Epoch 784/2000
Epoch 785/2000
Epoch 786/2000

Epoch 00785: reducing learning rate to 9.99999997475e-08.
Epoch 787/2000
Epoch 790/2000
Epoch 791/2000
Epoch 792/2000
Epoch 793/2000
Epoch 794/2000
Epoch 795/2000
Epoch 796/2000

Epoch 00795: reducing learning rate to 1.00000001169e-08.
Epoch 797/2000
Epoch 798/2000
Epoch 00797: early stopping


<keras.callbacks.History at 0x7f250b82f150>

In [32]:
#test prepare

# test_model, test_model_name = get_best_model()
test_model = load_model(CHECKPOINT_DIR + 'weights.771-0.1162.hdf5')
test_model_name = 'weights.771-0.1162.hdf5'
print('test_model_name', test_model_name)

def test_generator(df, mean, datagen = None, batch_size = BATCHSIZE):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
        else:
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
        i = 0
        for index,row in batch_df.iterrows():
            image_file = row['image_file']
            bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
            cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            x /= 255.
            if datagen is not None: x = datagen.random_transform(x)            
            x = preprocess_featurewise(x, mean)
            batch_x[i] = x
            i += 1
        if batch_index%50 == 0: print('batch_index', batch_index)
        yield(batch_x)
        
test_aug_datagen = ImageDataGenerator(
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)


test_model_name weights.771-0.1162.hdf5


In [14]:
# train_mean = [0.37698776,  0.41491762,  0.38681713]
X_all, y_all = data_load('all')
all_mean = featurewise_mean(X_all)
print('all_mean:', all_mean)

Loading from file data_all_224_224.pickle
all_mean: [ 0.40696082  0.43766695  0.3947188 ]


In [33]:
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax','split'
#                      'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']

file_name = 'GTbbox_CROPpred_df_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    GTbbox_CROPpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name) 
    nb_augmentation = 1
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=GTbbox_df, mean=all_mean), 
                                                  steps=int(math.ceil(GTbbox_df.shape[0]/BATCHSIZE)), workers=1, pickle_safe=False)
    else:
        test_preds = np.zeros((GTbbox_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=GTbbox_df, mean=all_mean, datagen=test_aug_datagen), 
                                                       steps=int(math.ceil(GTbbox_df.shape[0]/BATCHSIZE)), workers=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT'])
    GTbbox_CROPpred_df = pd.concat([GTbbox_df,CROPpred_df], axis=1)
    GTbbox_CROPpred_df['logloss'] = GTbbox_CROPpred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1)
    GTbbox_CROPpred_df.to_pickle(OUTPUT_DIR+file_name) 

crop_valid_loss = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['split']=='valid', 'logloss'].mean()
print('crop_valid_loss:', crop_valid_loss)
crop_all_loss = GTbbox_CROPpred_df['logloss'].mean()
print('crop_all_loss:', crop_all_loss)

Loading from file GTbbox_CROPpred_df_weights.771-0.1162.hdf5_.pickle
crop_valid_loss: 0.117746006953
crop_all_loss: 0.0167786059978


In [34]:
print('all crop loss:', GTbbox_CROPpred_df['logloss'].mean())
print(GTbbox_CROPpred_df.groupby(['crop_class'])['logloss'].mean())

train_CROPpred_df = GTbbox_CROPpred_df[GTbbox_CROPpred_df['split']=='train']
print('train crop loss:', train_CROPpred_df['logloss'].mean())
print(train_CROPpred_df.groupby(['crop_class'])['logloss'].mean())

valid_CROPpred_df = GTbbox_CROPpred_df[GTbbox_CROPpred_df['split']=='valid']
print('valid crop loss:', valid_CROPpred_df['logloss'].mean())
print(valid_CROPpred_df.groupby(['crop_class'])['logloss'].mean())

all crop loss: 0.0167786059978
crop_class
ALB      0.009740
BET      0.056503
DOL      0.000681
LAG      0.000156
OTHER    0.060335
SHARK    0.000861
YFT      0.014039
Name: logloss, dtype: float64
train crop loss: 0.00596067018111
crop_class
ALB      0.007633
BET      0.006089
DOL      0.000584
LAG      0.000159
OTHER    0.005810
SHARK    0.000904
YFT      0.003506
Name: logloss, dtype: float64
valid crop loss: 0.117746006953
crop_class
ALB      0.029328
BET      0.599428
DOL      0.001607
LAG      0.000128
OTHER    0.591509
SHARK    0.000448
YFT      0.107406
Name: logloss, dtype: float64


In [35]:
# RFCNbbox_RFCNpred_df = ['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
#                          'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                          'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']
# select fish_conf >= CONF_THRESH

file_name = 'RFCNbbox_RFCNpred_df_conf{:.2f}.pickle'.format(CONF_THRESH)
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)        
    RFCNbbox_RFCNpred_df = pd.DataFrame(columns=['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
                                                  'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
                                                  'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']) 

    with open('../data/RFCN_detections/detections_full_AGNOSTICnms_'+RFCN_MODEL+'_stg12.pkl','rb') as f:
        detections_full_AGNOSTICnms = pickle.load(f) 
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
        train_file_labels = f.readlines()
    assert len(detections_full_AGNOSTICnms) == len(test_files)
    
    count = np.zeros(len(detections_full_AGNOSTICnms))
    
    for im in range(len(detections_full_AGNOSTICnms)):
        if im%1000 == 0: print(im)
        basename = test_files[im][:-1]
        if im<1000:
            image_class = '--'
        else:
            for i in range(len(train_file_labels)):
                if train_file_labels[i][:9] == basename:
                    image_class = train_file_labels[i][10:-1]
                    break
        image = Image.open(TEST_DIR+'/'+basename+'.jpg')
        width_image, height_image = image.size
        
        bboxes = []
        detects_im = detections_full_AGNOSTICnms[im]
        for i in range(len(detects_im)):
            if np.sum(detects_im[i,5:]) >= CONF_THRESH:
                bboxes.append(detects_im[i,:]) 
        count[im] = len(bboxes)
        if len(bboxes) == 0:
            ind = np.argmax(np.sum(detects_im[:,5:], axis=1))
            bboxes.append(detects_im[ind,:])
        bboxes = np.asarray(bboxes)

        for j in range(len(bboxes)):    
            bbox = bboxes[j]
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            assert max(xmin,0)<min(xmax,width_image)
            assert max(ymin,0)<min(ymax,height_image)
            RFCNbbox_RFCNpred_df.loc[len(RFCNbbox_RFCNpred_df)]=[image_class,basename+'.jpg',j,max(xmin,0),max(ymin,0),
                                                                   min(xmax,width_image),min(ymax,height_image),
                                                                   bbox[4],bbox[5],bbox[6],bbox[7],bbox[8],bbox[9],bbox[10],bbox[11]]   
    
    RFCNbbox_RFCNpred_df.to_pickle(OUTPUT_DIR+file_name)      

Loading from file RFCNbbox_RFCNpred_df_conf0.80.pickle


In [36]:
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_'+test_model_name+'_TA10.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)  
    nb_augmentation = 10
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=all_mean), 
                                                  steps=int(math.ceil(RFCNbbox_RFCNpred_df.shape[0]/BATCHSIZE)), workers=1, pickle_safe=False)
    else:
        test_preds = np.zeros((RFCNbbox_RFCNpred_df.shape[0], len(CROP_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=all_mean, datagen=test_aug_datagen), 
                                                       steps=int(math.ceil(RFCNbbox_RFCNpred_df.shape[0]/BATCHSIZE)), workers=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB_CROP', 'BET_CROP', 'DOL_CROP', 'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP'])
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.concat([RFCNbbox_RFCNpred_df,CROPpred_df], axis=1)
    
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF'] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF_RFCN']
    for fish in ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']:
        RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.apply(lambda row: (1-row['NoF_RFCN'])*row[[fish+'_CROP']], axis=1)

    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.to_pickle(OUTPUT_DIR+file_name) 

Generating file RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_weights.771-0.1162.hdf5_TA10.pickle
1th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
2th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
3th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
4th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
5th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
6th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
7th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
8th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
9th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0
10th augmentation for testing ...
batch_index 50
batch_index 100
batch_index 0


In [37]:
# clsMaxAve and hybrid RFCNpred&CROPpred such that RFCNpred for NoF and CROPpred for fish
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'split']
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'test_pred_df_Hybrid_'+test_model_name+'_TA10.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    test_pred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)  
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    
    test_pred_df = pd.DataFrame(columns=['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT'])  
    for j in range(len(test_files)): 
        image_file = test_files[j][:-1]+'.jpg'
        test_pred_im_df = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file'] == image_file,
                                                                       ['image_class', 'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
        image_class = test_pred_im_df.iloc[0]['image_class']
        test_pred_im_df.drop('image_class', axis=1, inplace=True)
        max_score = test_pred_im_df.max(axis=1)
        max_cls = test_pred_im_df.idxmax(axis=1)
        test_pred_im_df['max_score'] = max_score
        test_pred_im_df['max_cls'] = max_cls
        test_pred_im_df['Count'] = test_pred_im_df.groupby(['max_cls'])['max_cls'].transform('count')
        idx = test_pred_im_df.groupby(['max_cls'])['max_score'].transform(max) == test_pred_im_df['max_score']
        test_pred_im_clsMax_df = test_pred_im_df.loc[idx,['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'Count']]
        test_pred_im_clsMax_array = test_pred_im_clsMax_df.values
        pred = np.average(test_pred_im_clsMax_array[:,:-1], axis=0, weights=test_pred_im_clsMax_array[:,-1], returned=False).tolist()
        if image_class!='--':
            ind = FISH_CLASSES.index(image_class)
            logloss = -math.log(pred[ind]) 
        else:
            logloss = np.nan
        test_pred_im_clsMaxAve = [logloss,image_class,image_file]
        test_pred_im_clsMaxAve.extend(pred)
        test_pred_df.loc[len(test_pred_df)]=test_pred_im_clsMaxAve
    
    image_split_df = GTbbox_df[['image_file','split']]
    image_split_df.drop_duplicates(inplace = True)
    test_pred_df = pd.merge(test_pred_df, image_split_df, how='outer', on='image_file')

    test_pred_df.to_pickle(OUTPUT_DIR+file_name) 

image_valid_loss = test_pred_df.loc[(test_pred_df['image_class']!='--') & (test_pred_df['split']=='valid'), 'logloss'].mean()
print('image_valid_loss:', image_valid_loss)
image_all_loss = test_pred_df.loc[test_pred_df['image_class']!='--', 'logloss'].mean()
print('image_all_loss:', image_all_loss)

Generating file test_pred_df_Hybrid_weights.771-0.1162.hdf5_TA10.pickle
image_valid_loss: 0.077320468337
image_all_loss: 0.0297610819685


In [38]:
all_IMAGEpred_df = test_pred_df[test_pred_df['image_class']!='--']
print('all image loss:', all_IMAGEpred_df['logloss'].mean())
print(all_IMAGEpred_df.groupby(['image_class'])['logloss'].mean())

train_IMAGEpred_df = all_IMAGEpred_df[all_IMAGEpred_df['split']=='train']
print('train image loss:', train_IMAGEpred_df['logloss'].mean())
print(train_IMAGEpred_df.groupby(['image_class'])['logloss'].mean())

valid_IMAGEpred_df = all_IMAGEpred_df[all_IMAGEpred_df['split']=='valid']
print('valid image loss:', valid_IMAGEpred_df['logloss'].mean())
print(valid_IMAGEpred_df.groupby(['image_class'])['logloss'].mean())

all image loss: 0.0297610819685
image_class
ALB      0.012839
BET      0.024662
DOL      0.007605
LAG      0.001182
NoF      0.125905
OTHER    0.040452
SHARK    0.046408
YFT      0.007359
Name: logloss, dtype: float64
train image loss: 0.00940486895619
image_class
ALB      0.010418
BET      0.001128
DOL      0.008128
LAG      0.000926
OTHER    0.002570
SHARK    0.051142
YFT      0.003006
Name: logloss, dtype: float64
valid image loss: 0.077320468337
image_class
ALB      0.034588
BET      0.236469
DOL      0.003028
LAG      0.003374
OTHER    0.380120
SHARK    0.002132
YFT      0.046776
Name: logloss, dtype: float64


In [None]:
#### visualization
# RFCNbbox_RFCNpred_CROPpred_df = ['image_class', 'image_file','crop_index','x_min','y_min','x_max','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN'
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP']
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
#                      'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

for j in range(test_pred_df.shape[0]):
    image_logloss = test_pred_df.iat[j,0]
    image_class = test_pred_df.iat[j,1]
    image_file = test_pred_df.iat[j,2]
    if j<1000 and j%30== 0:
        pass
    else: 
        continue
    im = Image.open('../RFCN/JPEGImages/'+image_file)
    im = np.asarray(im)
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.imshow(im, aspect='equal')
    RFCN_dets = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file']==image_file]
    for index,row in RFCN_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        RFCN = [row['NoF_RFCN'],row['ALB_RFCN'],row['BET_RFCN'],row['DOL_RFCN'],row['LAG_RFCN'],row['OTHER_RFCN'],row['SHARK_RFCN'],row['YFT_RFCN']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_RFCN = max(RFCN)
        score_CROP = max(CROP)
        index_RFCN = RFCN.index(score_RFCN)
        index_CROP = CROP.index(score_CROP)
        class_RFCN = FISH_CLASSES[index_RFCN]
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=2))
        ax.text(bbox[0], bbox[1] - 2, 'RFCN_{:s} {:.3f} \nHYBRID_{:s} {:.3f}'.format(class_RFCN, score_RFCN, class_CROP, score_CROP), bbox=dict(facecolor='red', alpha=0.5), fontsize=8, color='white')       
    GT_dets = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['image_file']==image_file]
    for index,row in GT_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_CROP = max(CROP)
        index_CROP = CROP.index(score_CROP)
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='green', linewidth=2))
        ax.text(bbox[0], bbox[3] + 40, 'GT_{:s} \nCROP_{:s} {:.3f}'.format(row['crop_class'], class_CROP, score_CROP), bbox=dict(facecolor='green', alpha=0.5), fontsize=8, color='white')
    ax.set_title(('Image {:s}    FISH {:s}    logloss {}').format(image_file, image_class, image_logloss), fontsize=10) 
    plt.axis('off')
    plt.tight_layout()
    plt.draw()

In [39]:
#temperature
T = 2.5
test_pred_array = test_pred_df[FISH_CLASSES].values
test_pred_T_array = np.exp(np.log(test_pred_array)/T)
test_pred_T_array = test_pred_T_array/np.sum(test_pred_T_array, axis=1, keepdims=True)
test_pred_T_df = pd.DataFrame(test_pred_T_array, columns=FISH_CLASSES)
test_pred_T_df = pd.concat([test_pred_df[['image_class','image_file']],test_pred_T_df], axis=1)

#test submission
submission = test_pred_T_df.loc[:13152,['image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
submission['image'] = submission.apply(lambda row: 'test_stg2/'+row['image_file'] if row['image_file'][:5]=='image' else row['image_file'], axis=1)
submission.drop('image_file', axis=1, inplace=True)
submission = submission[['image', 'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
sub_file = 'RFCN_AGONOSTICnms_'+RFCN_MODEL+'_'+CROP_MODEL+'_'+test_model_name+ '_TA10' \
'_clsMaxAve_conf{:.2f}_cropvalidloss{:.4f}_cropallloss{:.4f}_imagevalidloss{:.4f}_imageallloss{:.4f}_T{}.csv' \
.format(CONF_THRESH, crop_valid_loss, crop_all_loss, image_valid_loss, image_all_loss, T)
submission.to_csv(OUTPUT_DIR + sub_file, index=False)
print('Done!'+sub_file)



Done!RFCN_AGONOSTICnms_resnet101_rfcn_ohem_iter_30000_resnet50_FT_Hybrid_woNoF_weights.771-0.1162.hdf5_TA10_clsMaxAve_conf0.80_cropvalidloss0.1177_cropallloss0.0168_imagevalidloss0.0773_imageallloss0.0298_T2.5.csv
