In [1]:
import warnings
warnings.filterwarnings('ignore')

import re
import os
import cv2
import glob
import json
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm

from keras import backend as K
from models.net_models import resnet50_model, vgg19_model

Using TensorFlow backend.


In [2]:
balance = False
input_size = 197
batch_size = 64

data_dir = '/disk/landmark_rec/'
test_df = pd.read_csv(data_dir+'test.csv')

if balance:
    landmark_id_map = json.load(open(data_dir+'landmark_id_map.json'))
    landmark_id_map = {int(k): v for k, v in landmark_id_map.items()}
    num_class = len(landmark_id_map)
else:
    num_class = 14951
    
test_df.head()

Unnamed: 0,id,url
0,000088da12d664db,https://lh3.googleusercontent.com/-k45wfamuhT8...
1,0001623c6d808702,https://lh3.googleusercontent.com/-OQ0ywv8KVIA...
2,0001bbb682d45002,https://lh3.googleusercontent.com/-kloLenz1xZk...
3,0002362830cfe3a3,https://lh3.googleusercontent.com/-N6z79jNZYTg...
4,000270c9100de789,https://lh3.googleusercontent.com/-keriHaVOq1U...


In [3]:
# Remove download failed datas
all_ids = [re.sub('.jpg', '', os.path.basename(file_id)) for file_id in glob.glob(data_dir+'test/*.jpg')]
test_X = test_df.loc[test_df['id'].isin(all_ids)]
test_X = test_X['id']

In [4]:
model_name = 'hadamard_resnet50'

K.clear_session()
model = resnet50_model(input_shape=(input_size, input_size, 3), num_classes=num_class, 
                       weight_path=os.path.join(data_dir, 'weights/{0}.hdf5'.format(model_name)))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 197, 197, 3)       12        
_________________________________________________________________
resnet50 (Model)             multiple                  23587712  
_________________________________________________________________
average_pooling2d_1 (Average (None, 4, 4, 2048)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 32768)             0         
_________________________________________________________________
hadamard_classifier_1 (Hadam (None, 14951)             14952     
_________________________________________________________________
activation_50 (Activation)   (None, 14951)             0         
Total params: 23,602,676
Trainable params: 23,549,550
Non-trainable params: 53,126
___________________________________________________________

In [5]:
submission = pd.DataFrame()

for start in tqdm(range(0, len(test_X), batch_size)):
    x_batch = []
    end = min(start + batch_size, len(test_X))
    test_X_batch = test_X[start:end]
    for id_ in test_X_batch.values:
        img = cv2.imread(data_dir+'test/{}.jpg'.format(id_))
        img = cv2.resize(img, (input_size, input_size))
        x_batch.append(img)
        
    x_batch = np.array(x_batch, np.float32) / 255
    preds = model.predict_on_batch(x_batch)
    preds = pd.DataFrame({'id': test_X_batch.values, 
                          'landmarks': preds.tolist()})
    submission = pd.concat([submission, preds], axis=0)

submission.reset_index(drop=True, inplace=True)

100%|██████████| 1805/1805 [07:24<00:00,  4.06it/s]


In [9]:
model_name = 'hadamard_vgg19'

K.clear_session()
model = vgg19_model(input_shape=(input_size, input_size, 3), num_classes=num_class, 
                       weight_path=os.path.join(data_dir, 'weights/{0}.hdf5'.format(model_name)))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 197, 197, 3)       12        
_________________________________________________________________
vgg19 (Model)                (None, 6, 6, 512)         20024384  
_________________________________________________________________
flatten_1 (Flatten)          (None, 18432)             0         
_________________________________________________________________
hadamard_classifier_1 (Hadam (None, 14951)             14952     
_________________________________________________________________
activation_1 (Activation)    (None, 14951)             0         
Total params: 20,039,348
Trainable params: 20,039,342
Non-trainable params: 6
_________________________________________________________________


In [14]:
for start in tqdm(range(0, len(test_X), batch_size)):
    x_batch = []
    end = min(start + batch_size, len(test_X))
    test_X_batch = test_X[start:end]
    for id_ in test_X_batch.values:
        img = cv2.imread(data_dir+'test/{}.jpg'.format(id_))
        img = cv2.resize(img, (input_size, input_size))
        x_batch.append(img)
        
    x_batch = np.array(x_batch, np.float32) / 255
    preds = model.predict_on_batch(x_batch)
    
    idx = submission['id'].isin(test_X_batch)
    submission['landmarks'].loc[idx] = submission['landmarks'].loc[idx].apply(lambda x: np.array(x)*0.8)+\
                                                        pd.Series(list(preds))*0.2


100%|██████████| 1805/1805 [08:49<00:00,  3.41it/s]


In [None]:
if balance:
    def proba2idx(proba, threshold=.5):
        max_idx = np.argmax(proba)
        if landmark_id_map[max_idx] == 99999:
            return ''
        else:
            return '{0} {1:.5f}'.format(landmark_id_map[max_idx], proba[max_idx])
else:
    def proba2idx(proba, threshold=0.1):
        max_idx = np.argmax(proba)
        
        if proba[max_idx] < threshold:
            return ''
        else:
            return '{0} {1:.5f}'.format(max_idx, proba[max_idx])

        
submission['landmarks'] = submission['landmarks'].apply(proba2idx)

missing_id = test_df.loc[~test_df['id'].isin(submission['id'])]
missing_id.columns = ['id', 'landmarks']
missing_id['landmarks'] = ''

submission = pd.concat([submission, missing_id], axis=0)
submission.head()

In [None]:
import re
import datetime

date = re.sub('-', '', str(datetime.date.today())[5:])
submission.to_csv(os.path.join(data_dir, 'submit/sub_{0}_{1}.csv'.format('ensemble', date)), 
                  index=False)

In [None]:
submission.tail(20)