In [1]:
import os, random, glob, pickle, collections
import numpy as np
import pandas as pd
import ujson as json
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

from keras.models import Sequential, Model, load_model, model_from_json
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('tf')

from skimage.data import imread
from skimage.io import imshow,imsave
import cv2
from skimage.util import crop
from skimage.transform import rotate
from skimage.transform import resize
import math

Using TensorFlow backend.


In [2]:
TRAIN_DIR = '../data/train/'
TEST_DIR = '../RFCN/JPEGImages/'
TRAIN_CROP_DIR = '../data/train_crop/'
TEST_CROP_DIR = '../data/test_stg1_crop/'
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BatchSize = 128
LearningRate = 1e-4
le = LabelEncoder()
le.fit(FISH_CLASSES)
le.transform(FISH_CLASSES)

array([0, 1, 2, 3, 4, 5, 6, 7])

In [3]:
#Loading data
import pickle

def get_images(fish):
    """Load files from train folder"""
    fish_dir = TRAIN_CROP_DIR+'{}'.format(fish)
    images = [fish+'/'+im for im in os.listdir(fish_dir)]
    return images

def read_image(src):
    """Read and resize individual images"""
    im = Image.open(src)
    im = im.resize((COLS, ROWS), Image.BILINEAR)
    im = np.asarray(im)
    return im

if os.path.exists('../data/data_train_BBCrop_{}_{}.pickle'.format(ROWS, COLS)):
    print ('Exist data_train_BBCrop_{}_{}.pickle. Loading data from file.'.format(ROWS, COLS))
    with open('../data/data_train_BBCrop_{}_{}.pickle'.format(ROWS, COLS), 'rb') as f:
        data_train = pickle.load(f)
    X_train = data_train['X_train']
    y_train = data_train['y_train']
else:
    print ('Loading data from original images. Generating data_train_BBCrop_{}_{}.pickle.'.format(ROWS, COLS))
    
    files = []
    y_train = []

    for fish in FISH_CLASSES:
        fish_files = get_images(fish)
        files.extend(fish_files)

        y_fish = np.tile(fish, len(fish_files))
        y_train.extend(y_fish)
        #print("{0} photos of {1}".format(len(fish_files), fish))

    y_train = np.array(y_train)
    X_train = np.ndarray((len(files), ROWS, COLS, 3), dtype=np.uint8)

    for i, im in enumerate(files): 
        X_train[i] = read_image(TRAIN_CROP_DIR+im)
        if i%1000 == 0: print('Processed {} of {}'.format(i, len(files)))

    #X_train = X_train / 255.
    #print(X_train.shape)

    # One Hot Encoding Labels
    y_train = le.transform(y_train)
    y_train = np_utils.to_categorical(y_train)
    
    #save data to file
    data_train = {'X_train': X_train,'y_train': y_train }

    with open('../data/data_train_BBCrop_{}_{}.pickle'.format(ROWS, COLS), 'wb') as f:
        pickle.dump(data_train, f)

#rescale
X_train = X_train / 255.
X_train = X_train.astype(np.float32)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=None, stratify=y_train)

Exist data_train_BBCrop_224_224.pickle. Loading data from file.


In [4]:
X_train.shape

(15583, 224, 224, 3)

In [5]:
#data preprocessing

train_datagen = ImageDataGenerator(
    featurewise_center=True,
    #featurewise_std_normalization=True,
    #rescale=1./255,
    rotation_range=20,
    shear_range=0.2,
    zoom_range=[0.9,1.1],
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)
train_datagen.fit(X_train)
train_generator = train_datagen.flow(X_train, y_train, batch_size=BatchSize, shuffle=True, seed=None)

valid_datagen = ImageDataGenerator(
    featurewise_center=True)
    #featurewise_std_normalization=True)
    #rescale=1./255
valid_datagen.fit(X_valid)   
valid_generator = valid_datagen.flow(X_valid, y_valid, batch_size=BatchSize, shuffle=True, seed=None)

In [6]:
#callbacks

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')        

model_checkpoint = ModelCheckpoint(filepath='./checkpoints/checkpoint2/weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
        
learningrate_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, mode='auto', epsilon=0.001, cooldown=0, min_lr=0)

tensorboard = TensorBoard(log_dir='./logs/log2', histogram_freq=0, write_graph=True, write_images=True)


In [None]:
#Resnet50
#stg1 training

from keras.applications.resnet50 import ResNet50

base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
#x = Flatten()(x)
#x = Dense(256, init='glorot_normal', activation='relu')(x)
#x = LeakyReLU(alpha=0.33)(x)
#x = Dropout(0.5)(x)
#x = Dense(256, init='glorot_normal', activation='relu')(x)
#x = LeakyReLU(alpha=0.33)(x)
#x = Dropout(0.5)(x)
predictions = Dense(len(FISH_CLASSES), init='glorot_normal', activation='softmax')(x)

# this is the model we will train
model = Model(input=base_model.input, output=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional VGG16 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# train the model on the new data for a few epochs
model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), nb_worker=3, pickle_safe=True)

Epoch 1/30



Epoch 00000: val_loss improved from inf to 1.40994, saving model to ./checkpoints/checkpoint2/weights.000-1.4099.hdf5
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30

In [10]:
#Resnet50
#stg2 training

# files = glob.glob('./checkpoints/*')
# val_losses = [float(f.split('-')[-1][:-5]) for f in files]
# index = val_losses.index(min(val_losses))
print('Loading model from checkpoints file weights.028-0.4157.hdf5')
model = load_model('./checkpoints/checkpoint2/weights.028-0.4157.hdf5')

from keras.applications.resnet50 import ResNet50

base_model = ResNet50(weights='imagenet', include_top=False)
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
#164
for layer in model.layers[:142]:
   layer.trainable = False
for layer in model.layers[142:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=LearningRate)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=300, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), nb_worker=3, pickle_safe=True)

Loading model from checkpoints file weights.028-0.4157.hdf5
0 input_1
1 zeropadding2d_1
2 conv1
3 bn_conv1
4 activation_1
5 maxpooling2d_1
6 res2a_branch2a
7 bn2a_branch2a
8 activation_2
9 res2a_branch2b
10 bn2a_branch2b
11 activation_3
12 res2a_branch2c
13 res2a_branch1
14 bn2a_branch2c
15 bn2a_branch1
16 merge_1
17 activation_4
18 res2b_branch2a
19 bn2b_branch2a
20 activation_5
21 res2b_branch2b
22 bn2b_branch2b
23 activation_6
24 res2b_branch2c
25 bn2b_branch2c
26 merge_2
27 activation_7
28 res2c_branch2a
29 bn2c_branch2a
30 activation_8
31 res2c_branch2b
32 bn2c_branch2b
33 activation_9
34 res2c_branch2c
35 bn2c_branch2c
36 merge_3
37 activation_10
38 res3a_branch2a
39 bn3a_branch2a
40 activation_11
41 res3a_branch2b
42 bn3a_branch2b
43 activation_12
44 res3a_branch2c
45 res3a_branch1
46 bn3a_branch2c
47 bn3a_branch1
48 merge_4
49 activation_13
50 res3b_branch2a
51 bn3b_branch2a
52 activation_14
53 res3b_branch2b
54 bn3b_branch2b
55 activation_15
56 res3b_branch2c
57 bn3b_branch2c




Epoch 00000: val_loss improved from 0.41571 to 0.22631, saving model to ./checkpoints/checkpoint2/weights.000-0.2263.hdf5
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300

Epoch 00010: reducing learning rate to 9.999999747378752e-06.
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300

KeyboardInterrupt: 

In [9]:
#resume training

files = glob.glob('./checkpoints/checkpoint2/*')
val_losses = [float(f.split('-')[-1][:-5]) for f in files]
index = val_losses.index(min(val_losses))
print('Loading model from checkpoints file ' + files[index])
model = load_model(files[index])

model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), nb_worker=3, pickle_safe=True)

Loading model from checkpoints file ./checkpoints/checkpoint2/weights.004-1.1775.hdf5
Epoch 1/30



Epoch 00000: val_loss improved from inf to 1.02681, saving model to ./checkpoints/checkpoint2/weights.000-1.0268.hdf5
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f3c16fcce10>

In [3]:
#get bbox from detections_full_AGNOSTICnms.pkl
RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'

import pickle 
with open('../data/RFCN_detections/detections_full_AGNOSTICnms_'+RFCN_MODEL+'.pkl','rb') as f:
    detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1') 
    
outputs = []
count = np.zeros(len(detections_full_AGNOSTICnms))

for im in range(len(detections_full_AGNOSTICnms)):
    outputs_im = []
    detects_im = detections_full_AGNOSTICnms[im]
    for i in range(len(detects_im)):
        if np.max(detects_im[i,5:]) >= CONF_THRESH:
            outputs_im.append(detects_im[i,:]) 
    count[im] = len(outputs_im)
    if len(outputs_im) == 0:
        ind = np.argmax(np.max(detects_im[:,5:], axis=1))
        outputs_im.append(detects_im[ind,:])
    outputs_im = np.asarray(outputs_im)
    outputs.append(outputs_im)
    
#crop test images and cache to TEST_CROP_DIR

# if not os.path.exists(TEST_CROP_DIR):
#     os.mkdir(TEST_CROP_DIR)
# files = glob.glob(TEST_CROP_DIR+'*')
# for f in files:
#     os.remove(f)
    
# with open("../RFCN/ImageSets/Main/test.txt","r") as f:
#     ims = f.readlines()
# test_files = [im[:-1]+'.jpg' for im in ims]

# for i in range(len(outputs)):
#     if i%1000 == 0:
#         print(i)
#     filename = test_files[i]
#     bboxes = outputs[i]
#     basename, file_extension = os.path.splitext(filename) 
#     image = Image.open(TEST_DIR+filename)
#     for j in range(len(bboxes)):
#         bbox = bboxes[j]
#         xmin = bbox[0]
#         ymin = bbox[1]
#         xmax = bbox[2]
#         ymax = bbox[3]
#         file_crop = TEST_CROP_DIR+basename+'_{}'.format(j)+'.jpg'
#         cropped = image.crop((xmin, ymin, xmax, ymax))
#         width_cropped, height_cropped = cropped.size
#         if height_cropped > width_cropped: cropped = cropped.transpose(method=2)
#         cropped.save(file_crop)
        
print(sum([outputs[i].shape[0] for i in range(len(outputs))]))

6037


In [63]:
test_crop_preds = np.vstack(outputs)[:,4:]

columns = ['NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN', 'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']
RFCN_preds_df = pd.DataFrame(test_crop_preds, columns=columns)


with open("../RFCN/ImageSets/Main/test.txt","r") as f:
    ims = f.readlines()
test_files = [im[:-1]+'.jpg' for im in ims]

test_crop_files_RFCN = []
for i in range(len(outputs)):
    filename = test_files[i]
    basename, file_extension = os.path.splitext(filename) 
    for j in range(len(outputs[i])):
        file_crop = basename+'_{}_'.format(j)+'.jpg'
        test_crop_files_RFCN.append(file_crop)
        
RFCN_preds_df.insert(0, 'test_crop_files', test_crop_files_RFCN)

In [65]:
RFCN_preds_df.head()

Unnamed: 0,test_crop_files,NoF_RFCN,ALB_RFCN,BET_RFCN,DOL_RFCN,LAG_RFCN,OTHER_RFCN,SHARK_RFCN,YFT_RFCN
0,img_00005_0_.jpg,0.999997,2e-06,7.765914e-08,2.145238e-07,5.994206e-07,2.103843e-07,1.2822e-07,2.089381e-07
1,img_00007_0_.jpg,0.000127,8.3e-05,6.000044e-05,3.268235e-06,8.629868e-07,9.324418e-08,3.096656e-06,0.9997229
2,img_00009_0_.jpg,0.000411,0.998497,0.0009396088,7.386075e-07,6.028753e-07,0.0001481739,6.061006e-07,2.904518e-06
3,img_00009_1_.jpg,8e-05,0.997262,0.002547039,2.058682e-07,4.018886e-07,0.0001093486,4.156222e-07,9.210435e-07
4,img_00009_2_.jpg,0.011148,0.985716,0.0004040803,3.50566e-05,7.493963e-05,0.002543065,9.936455e-06,6.904111e-05


In [41]:
#Load test data

import datetime

def read_image(src):
    """Read and resize individual images"""
    im = Image.open(src)
    im = im.resize((COLS, ROWS), Image.BILINEAR)
    im = np.asarray(im)
    return im

if os.path.exists('../data/data_test_BBCrop_{}_{}.pickle'.format(ROWS, COLS)):
    print ('Exist data_test_BBCrop_{}_{}.pickle. Loading test data from file.'.format(ROWS, COLS))
    with open('../data/data_test_BBCrop_{}_{}.pickle'.format(ROWS, COLS), 'rb') as f:
        data_test = pickle.load(f)
    X_test_crop = data_test['X_test_crop']
    test_crop_files = data_test['test_crop_files']
else:
    print ('Loading test data from original images. Generating data_test_BBCrop_{}_{}.pickle.'.format(ROWS, COLS))

    test_crop_files = sorted([im for im in os.listdir(TEST_CROP_DIR)])
    X_test_crop = np.ndarray((len(test_crop_files), ROWS, COLS, 3), dtype=np.uint8)

    for i, im in enumerate(test_crop_files): 
        X_test_crop[i] = read_image(TEST_CROP_DIR+im)
        if i%1000 == 0: print('Processed {} of {}'.format(i, len(test_crop_files)))
            
    data_test = {'X_test_crop': X_test_crop,'test_crop_files': test_crop_files }
    
    with open('../data/data_test_BBCrop_{}_{}.pickle'.format(ROWS, COLS), 'wb') as f:
        pickle.dump(data_test, f, protocol=4)
        
X_test_crop = X_test_crop / 255.

Exist data_test_BBCrop_224_224.pickle. Loading test data from file.


In [69]:
print('Loading model from weights.004-0.0565.hdf5')
model = load_model('./checkpoints/checkpoint2/weights.004-0.0565.hdf5')
test_crop_preds = model.predict(X_test_crop, batch_size=BatchSize, verbose=1)

columns = ['ALB_BBCROP', 'BET_BBCROP', 'DOL_BBCROP', 'LAG_BBCROP', 'NoF_BBCROP', 'OTHER_BBCROP', 'SHARK_BBCROP', 'YFT_BBCROP']
BBCROP_preds_df = pd.DataFrame(test_crop_preds, columns=columns)

test_crop_files_BBCROP = test_crop_files
BBCROP_preds_df.insert(0, 'test_crop_files', test_crop_files_BBCROP)

Loading model from weights.004-0.0565.hdf5


In [70]:
BBCROP_preds_df.head()

Unnamed: 0,test_crop_files,ALB_BBCROP,BET_BBCROP,DOL_BBCROP,LAG_BBCROP,NoF_BBCROP,OTHER_BBCROP,SHARK_BBCROP,YFT_BBCROP
0,img_00003_0_.jpg,0.650035,3.1e-05,3.2e-05,0.303847,0.013463,0.000206,4e-06,0.032382
1,img_00003_1_.jpg,0.612278,0.000123,0.000318,0.007562,0.353335,0.000562,0.002353,0.023469
2,img_00003_2_.jpg,0.828563,0.00093,0.000137,0.079166,0.04001,0.001362,1.8e-05,0.049815
3,img_00003_3_.jpg,0.035011,0.000273,0.000792,0.00341,0.923263,0.005048,0.00504,0.027163
4,img_00004_0_.jpg,0.001726,0.005767,0.002891,8.9e-05,0.018004,4.3e-05,0.001965,0.969516


In [71]:
test_preds_df = pd.merge(RFCN_preds_df, BBCROP_preds_df)  

In [75]:
for c in FISH_CLASSES:
    test_preds_df[c+'_RFCN-BBCROP'] = test_preds_df[c+'_RFCN'] - test_preds_df[c+'_BBCROP']



In [77]:
columns_diff = [c+'_RFCN-BBCROP' for c in FISH_CLASSES]
test_preds_df['max_diff'] = test_preds_df[columns_diff].max(axis=1)

In [80]:
test_preds_df[test_preds_df["max_diff"]>=0.8].head(10)

Unnamed: 0,test_crop_files,NoF_RFCN,ALB_RFCN,BET_RFCN,DOL_RFCN,LAG_RFCN,OTHER_RFCN,SHARK_RFCN,YFT_RFCN,ALB_BBCROP,...,YFT_BBCROP,ALB_RFCN-BBCROP,BET_RFCN-BBCROP,DOL_RFCN-BBCROP,LAG_RFCN-BBCROP,NoF_RFCN-BBCROP,OTHER_RFCN-BBCROP,SHARK_RFCN-BBCROP,YFT_RFCN-BBCROP,max_diff
2,img_00009_0_.jpg,0.000411,0.998497,0.00094,7.386075e-07,6.028753e-07,0.000148,6.061006e-07,2.904518e-06,0.185161,...,0.051031,0.813336,-0.006452,-0.005766,-0.023524,-0.713069,-0.013078,-0.000419,-0.051029,0.813336
3,img_00009_1_.jpg,8e-05,0.997262,0.002547,2.058682e-07,4.018886e-07,0.000109,4.156222e-07,9.210435e-07,0.171151,...,0.002431,0.82611,0.002122,-9.2e-05,-0.047888,-0.777867,5.5e-05,-1e-05,-0.00243,0.82611
7,img_00027_0_.jpg,0.000204,0.989823,0.004538,6.229545e-05,2.735355e-05,0.004744,2.124482e-05,0.0005801505,0.002152,...,0.004572,0.987671,0.004517,-9.7e-05,1.9e-05,-0.000363,0.003326,-0.991083,-0.003992,0.987671
11,img_00053_0_.jpg,0.003392,0.995045,0.000144,3.611618e-05,4.381267e-05,0.000147,1.218681e-05,0.001179397,0.042701,...,6e-05,0.952344,6.1e-05,-4.9e-05,-0.00062,-0.95243,-6.1e-05,-0.000363,0.001119,0.952344
17,img_00102_1_.jpg,0.00249,0.996992,1.7e-05,1.210032e-06,2.301208e-06,4.4e-05,8.039682e-06,0.000445509,0.078716,...,0.000295,0.918276,-3.7e-05,-5.6e-05,-0.000343,-0.913665,-0.000414,-0.003913,0.000151,0.918276
28,img_00138_0_.jpg,0.002167,0.993929,0.000827,4.470697e-05,5.833802e-05,0.000369,2.139192e-05,0.002583566,0.085957,...,0.00267,0.907972,-0.001575,-0.001822,-0.005077,-0.898538,-0.000104,-0.00077,-8.7e-05,0.907972
33,img_00170_0_.jpg,0.010553,0.005909,0.97026,0.0002192429,0.009156979,0.001424,7.300457e-05,0.00240552,0.024338,...,0.016697,-0.018429,0.96346,4e-06,-0.262183,-0.668788,0.00042,-0.000193,-0.014292,0.96346
40,img_00223_0_.jpg,0.001502,0.99525,0.000643,7.540416e-05,0.0001170128,0.001112,0.0001148711,0.001185521,0.104764,...,0.000118,0.890486,0.000572,-0.000239,-0.000713,-0.891655,0.000971,-0.000489,0.001067,0.890486
45,img_00232_0_.jpg,0.002118,9.2e-05,0.000109,9.505967e-05,4.251345e-06,0.008513,0.9889659,0.0001038441,0.059854,...,0.003797,-0.059762,-0.002659,-0.000338,-0.003148,-0.914922,0.007113,0.977409,-0.003693,0.977409
49,img_00282_0_.jpg,0.051981,0.000411,6.5e-05,0.0001735692,3.506795e-05,0.947223,3.669118e-05,7.468431e-05,0.02559,...,0.000721,-0.025179,-0.000539,-0.00047,-0.000421,-0.916746,0.945165,-0.001164,-0.000646,0.945165


In [31]:
#test preds clsMaxAve
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

# files = glob.glob('./checkpoints/checkpoint2/*')
# val_losses = [float(f.split('-')[-1][:-5]) for f in files]
# index = val_losses.index(min(val_losses))
# print('Loading model from', files[index])
# model = load_model(files[index])
print('Loading model from weights.004-0.0565.hdf5')
model = load_model('./checkpoints/checkpoint2/weights.004-0.0565.hdf5')

test_crop_preds = model.predict(X_test_crop, batch_size=BatchSize, verbose=1)
#test_crop_preds = np.vstack(outputs)[:,4:]

with open("../RFCN/ImageSets/Main/test.txt","r") as f:
    ims = f.readlines()
test_files = [im[:-1]+'.jpg' for im in ims]

count = np.zeros(len(test_files))

test_preds = np.ndarray((len(test_files), test_crop_preds.shape[1]), dtype=np.float32)
for j in range(len(test_files)):
    if j%1000 == 0:
        print(j)
    file = test_files[j]
    test_preds_im = []
    for i in range(len(test_crop_files)):
        if test_crop_files[i][:9] == file[:9]:
            test_preds_im.append(test_crop_preds[i])
    test_preds_im = np.asarray(test_preds_im)
    score_max = np.max(test_preds_im, axis=1)
    inds = np.argmax(test_preds_im, axis=1)
    labels = [FISH_CLASSES[ind] for ind in inds]
    columns = FISH_CLASSES[:]
    test_preds_im_df = pd.DataFrame(test_preds_im, columns=columns)
    test_preds_im_df['max_cls'] = labels
    test_preds_im_df['max_score'] = score_max 
    test_preds_im_df['Counts'] = test_preds_im_df.groupby(['max_cls'])['max_cls'].transform('count')
    idx = test_preds_im_df.groupby(['max_cls'])['max_score'].transform(max) == test_preds_im_df['max_score']
    test_preds_im_df = test_preds_im_df[idx]
    count[j] = test_preds_im_df.shape[0]
    l = FISH_CLASSES.copy()
    l.append('Counts')
    test_preds_im_array = test_preds_im_df[l].as_matrix() 
    test_preds[j] = np.average(test_preds_im_array[:,:-1], axis=0, weights=test_preds_im_array[:,-1], returned=False)

Loading model from weights.003-0.0761.hdf5
0
1000
2000
3000
4000


In [33]:
#temperature
T = 2.5
test_preds_T = np.exp(np.log(test_preds)/T)
test_preds_T = test_preds_T/np.sum(test_preds_T, axis=1, keepdims=True)

In [34]:
#calculate train logloss
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
#FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

train_files = test_files[1000:]
train_preds = test_preds_T[1000:,:]
with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
    train_file_labels = f.readlines()

log_losses = []
for i in range(len(train_preds)):
    im = train_files[i][:-4]
    for im_label in train_file_labels:
        if im_label[:9] == im:
            label = im_label[10:-1]
            index = FISH_CLASSES.index(label)
            log_losses.append(-math.log(train_preds[i,index]))
log_loss = sum(log_losses) / float(len(log_losses))
print('logloss of train is', log_loss )

logloss of train is 0.9195452458092187


In [53]:
#test submission
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
submission = pd.DataFrame(test_preds_T[:1000,:], columns=FISH_CLASSES)
submission.insert(0, 'image', test_files[:1000])

info = 'RFCN_AGONOSTICnms_'+RFCN_MODEL+'_BBCROP_resnet50_clsMaxAve_conf{:.2f}_T{}_'.format(CONF_THRESH, T) + '{:.4f}'.format(log_loss)
sub_file = 'submission_' + info + '.csv'
submission.to_csv(sub_file, index=False)

In [None]:
###clear checkpoints folder

if not os.path.exists('./checkpoints'):
    os.mkdir('./checkpoints')
files = glob.glob('./checkpoints/*')
for f in files:
    os.remove(f)

In [None]:
###clear logs folder

if not os.path.exists('./logs'):
    os.mkdir('./logs')
files = glob.glob('./logs/*')
for f in files:
    os.remove(f)