In [1]:
import os
import os.path as pth
import json
import shutil
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import tensorflow.keras as keras

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [3]:
BASE_MODEL_NAME = 'ResNet50V2-kfold'
my_model_base = keras.applications.resnet_v2
my_model = my_model_base.ResNet50V2

config = {
    'is_zscore':True,
    
    # 'input_shape': (540, 960, 3),
    'aug': {
        'resize': (270, 480),
        #'resize': (297, 528),
    },
    # 'input_shape': (224, 360, 3),
    #'input_shape': (270, 480, 3),
    'input_shape': (270, 480, 3),

    'output_activation': 'softmax',
    'num_class': 1049,
    'output_size': 1049,
    
    'conv':{
        'conv_num': (0,), # (3,5,3),
        'base_channel': 0, # 4,
        'kernel_size': 0, # 3,
        'padding':'same',
        'stride':'X'
    },
    'pool':{
        'type':'X',
        'size':'X',
        'stride':'X',
        'padding':'same'
    },
    'fc':{
        'fc_num': 0,
     },
    
    'activation':'relu',
    
    'between_type': 'avg',
    
    'is_batchnorm': True,
    'is_dropout': False,
    'dropout_rate': 0.5,
    
    'batch_size': 80,
    'buffer_size': 256,
    'loss': 'CategoricalCrossentropy',
    
    #'num_epoch': 10000,
    'learning_rate': 1e-3,
    
    'random_state': 7777
}

In [4]:
image_feature_description = {
    'image_raw': tf.io.FixedLenFeature([], tf.string),
    'landmark_id': tf.io.FixedLenFeature([], tf.int64),
    # 'id': tf.io.FixedLenFeature([], tf.string),
}

def _parse_image_function(example_proto):
    return tf.io.parse_single_example(example_proto, image_feature_description)

def map_func(target_record):
    img = target_record['image_raw']
    label = target_record['landmark_id']
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.dtypes.cast(img, tf.float32)
    return img, label

def resize_and_crop_func(image, label):
    result_image = tf.image.resize(image, config['aug']['resize'])
    #result_image = tf.image.random_crop(image, size=config['input_shape'], seed=7777)  # crop revived.
    return result_image, label

def image_aug_func(image, label):
    pass
    return image, label

def post_process_func(image, label):
    # result_image = result_image / 255
    result_image = my_model_base.preprocess_input(image)
    onehot_label = tf.one_hot(label, depth=config['num_class'])
    return result_image, onehot_label

In [5]:
data_base_path = pth.join('data', 'public') 
os.makedirs(data_base_path, exist_ok=True)

In [6]:
category_csv_name = 'category.csv'
category_json_name = 'category.json'
submission_csv_name = 'sample_submisstion.csv'
train_csv_name = 'train.csv'

# train_zip_name = 'train.zip'
train_tfrecord_name = 'all_train.tfrecords'
train_tfrecord_path = pth.join(data_base_path, train_tfrecord_name)
val_tfrecord_name = 'all_val.tfrecords'
val_tfrecord_path = pth.join(data_base_path, val_tfrecord_name)
# test_zip_name = 'test.zip'
test_tfrecord_name = 'test.tfrecords'
test_tfrecord_path = pth.join(data_base_path, test_tfrecord_name)

In [7]:
train_csv_path = pth.join(data_base_path, train_csv_name)
train_df = pd.read_csv(train_csv_path)
train_dict = {k:v for k, v in train_df.values}

submission_csv_path = pth.join(data_base_path, submission_csv_name)
submission_df = pd.read_csv(submission_csv_path)
# submission_df.head()

category_csv_path = pth.join(data_base_path, category_csv_name)
category_df = pd.read_csv(category_csv_path)
category_dict = {k:v for k, v in category_df.values}
# category_df.head()

### Model

In [8]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import cv2

import matplotlib.pyplot as plt
from PIL import Image

from sklearn.model_selection import train_test_split, KFold, RepeatedKFold, GroupKFold, RepeatedStratifiedKFold
from sklearn.utils import shuffle

import numpy as np
import pandas as pd
import os
import os.path as pth
import shutil
import time
from tqdm import tqdm

import itertools
from itertools import product, combinations

import numpy as np
from PIL import Image

from IPython.display import clear_output

from multiprocessing import Process, Queue
import datetime

import tensorflow.keras as keras

from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.layers import Input, Dense, Activation, BatchNormalization, \
                                    Flatten, Conv3D, AveragePooling3D, MaxPooling3D, Dropout, \
                                    Concatenate, GlobalMaxPool3D, GlobalAvgPool3D
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ModelCheckpoint,LearningRateScheduler, \
                                        EarlyStopping
from tensorflow.keras.losses import mean_squared_error, mean_absolute_error
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import max_norm

In [9]:
def build_cnn(config):
    input_layer = Input(shape=config['input_shape'], name='input_layer')
    pret_model = my_model(
        input_tensor=input_layer, include_top=False, weights='imagenet', 
        input_shape=config['input_shape'], pooling=config['between_type'], 
        classes=config['output_size']
    )

    pret_model.trainable = False
    
    x = pret_model.output
    
    if config['between_type'] == None:
        x = Flatten(name='flatten_layer')(x)
        
    if config['is_dropout']:
        x = Dropout(config['dropout_rate'], name='output_dropout')(x)    
            
    x = Dense(config['output_size'], activation=config['output_activation'], 
          name='output_fc')(x)
#     x = Activation(activation=config['output_activation'], name='output_activation')(x)
    model = Model(inputs=input_layer, outputs=x, name='{}'.format(BASE_MODEL_NAME))

    return model

In [10]:
model = build_cnn(config)
model.summary(line_length=150)
del model

Model: "ResNet50V2-kfold"
______________________________________________________________________________________________________________________________________________________
Layer (type)                                     Output Shape                     Param #           Connected to                                      
input_layer (InputLayer)                         [(None, 270, 480, 3)]            0                                                                   
______________________________________________________________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)                        (None, 276, 486, 3)              0                 input_layer[0][0]                                 
______________________________________________________________________________________________________________________________________________________
conv1_conv (Conv2D)                              (None, 135, 240, 64

In [10]:
model_base_path = data_base_path
model_checkpoint_path = pth.join(model_base_path, 'checkpoint')

In [11]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)

In [12]:
AUTO     = tf.data.experimental.AUTOTUNE
FILENAMES = tf.io.gfile.glob(pth.join(data_base_path, 'train_tfrec', '*'))
TEST_FILENAMES = tf.io.gfile.glob(pth.join(data_base_path, 'test_tfrec', '*'))

In [13]:
# training tfrecords 로드
def read_tr_tfrecord(example):
    TFREC_FORMAT = {
        "image_raw": tf.io.FixedLenFeature([], tf.string), 
        "landmark_id": tf.io.FixedLenFeature([], tf.int64),
        'id': tf.io.FixedLenFeature([], tf.string),
         }
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    return example
#     image = example['image_raw']
#     target = tf.cast(example['landmark_id'], tf.int64)
#     return image, target

# validation tfrecords 로드
def read_val_tfrecord(example):
    TFREC_FORMAT = {
        "image_raw": tf.io.FixedLenFeature([], tf.string), 
        "landmark_id": tf.io.FixedLenFeature([], tf.int64),
        'id': tf.io.FixedLenFeature([], tf.string),
         }
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    return example
#     image = example['image_raw']
#     target = tf.cast(example['landmark_id'], tf.int64)
#     return image, target

# test tfrecords 로드
def read_test_tfrecord(example):
    TFREC_FORMAT = {
        "image_raw": tf.io.FixedLenFeature([], tf.string), 
        'id': tf.io.FixedLenFeature([], tf.string),
         }
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    return example
#     image = example['image_raw']
#     id = example['id']
#     return image, id

In [14]:
def get_training_dataset(filenames, ordered = False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
        
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_tr_tfrecord, num_parallel_calls = AUTO)

    #dataset = dataset.map(_parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    # dataset = dataset.cache()
    dataset = dataset.map(map_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.map(resize_and_crop_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.map(image_aug_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(config['buffer_size'])
    dataset = dataset.batch(config['batch_size'])
    dataset = dataset.map(post_process_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    
    return dataset

In [15]:
def get_validation_dataset(filenames, ordered = True, prediction = False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
        
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_val_tfrecord, num_parallel_calls = AUTO)
    
    dataset = dataset.map(map_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.map(resize_and_crop_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.map(image_aug_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    
    if prediction:
        dataset = dataset.batch(config['batch_size'] * 4)  # why 4 times?
    else:
        dataset = dataset.batch(config['batch_size'])
    dataset = dataset.map(post_process_func, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.prefetch(AUTO) 
    return dataset

In [16]:
def test_just_image(image, id):
    return image
def test_just_id(image, id):
    return id

def get_test_dataset(filenames, ordered=True, prediction=False, name=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False 
        
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_test_tfrecord, num_parallel_calls = AUTO)
    
    #dataset = load_dataset(filenames, tr='test', ordered = ordered)
    if name:
        dataset = dataset.map(test_just_id, num_parallel_calls = AUTO)
    else:
        dataset = dataset.map(test_just_image, num_parallel_calls = AUTO)
    dataset = dataset.batch(config['batch_size'])
    dataset = dataset.prefetch(AUTO)
    return dataset

In [17]:
# USE DIFFERENT SEED FOR DIFFERENT STRATIFIED KFOLD
SEED = 42

# NUMBER OF FOLDS. USE 3, 5, OR 15 
FOLDS = 5

#BATCH_SIZES = [32]*FOLDS
EPOCHS = [8]*FOLDS

PRE_TRAIN_EPOCH = 1

In [18]:
def get_lr_callback():
    lr_start   = 0.000001*10*0.5
    lr_max     = 0.0000005 * config['batch_size'] * 10*0.5
    lr_min     = 0.000001 * 10*0.5
    #lr_ramp_ep = 3 #### TODO: NEED TO BE CONSIDERED WISELY.  # 5
    lr_ramp_ep = 3 #### (small lr) going up -> ramp (large max lr) -> going down (small lr)
    lr_sus_ep  = 0
    lr_decay   = 0.8
     
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start   
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max    
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min    
        print('lr=',lr)
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = False)
    return lr_callback

In [19]:
base = BASE_MODEL_NAME

base += '_resize_{}'.format(config['aug']['resize'][0])
#base += '_input_{}'.format(config['input_shape'][0])
base += '_conv_{}'.format('-'.join(map(lambda x:str(x),config['conv']['conv_num'])))
base += '_basech_{}'.format(config['conv']['base_channel'])
base += '_act_{}'.format(config['activation'])
base += '_pool_{}'.format(config['pool']['type'])
base += '_betw_{}'.format(config['between_type'])
base += '_fc_{}'.format(config['fc']['fc_num'])
base += '_zscore_{}'.format(config['is_zscore'])
base += '_batch_{}'.format(config['batch_size'])
if config['is_dropout']:
    base += '_DO_'+str(config['dropout_rate']).replace('.', '')
if config['is_batchnorm']:
    base += '_BN'+'_O'
else:
    base += '_BN'+'_X'

model_name = base

In [None]:
import gc
from sklearn.model_selection import KFold
FILENAMES = np.array(FILENAMES)

oof_pred = []; oof_tar = []; oof_val = []; oof_names = []; oof_folds = [] 
#preds = np.zeros((count_data_items(files_test),1))

skf = KFold(n_splits = FOLDS, shuffle=True,random_state=SEED)
for fold, (tr_index, val_index) in enumerate(skf.split(FILENAMES)):

#     if fold == 0:
#         continue

    print('#'*25); print('#### FOLD',fold+1)
    #gc.collect()
    
    #print('################', 'lr=', LEARNING_RATE)
    print(model_name)

    TRAINING_FILENAMES, VALIDATION_FILENAMES = FILENAMES[tr_index], FILENAMES[val_index]
    #NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
  
    np.random.shuffle(TRAINING_FILENAMES); print('#'*25)
    #seed_everything(SEED)
    
    train_dataset = get_training_dataset(TRAINING_FILENAMES,ordered = False)
    val_dataset = get_validation_dataset(VALIDATION_FILENAMES,ordered = True, prediction = False)
    
    print('FILENAMES=', len(FILENAMES))
    print('TRAINING_FILENAMES=', len(TRAINING_FILENAMES))
    print('VALIDATION_FILENAMES=', len(VALIDATION_FILENAMES))
    STEPS_PER_EPOCH = np.ceil(len(TRAINING_FILENAMES)/config['batch_size'])
    print('STEPS_PER_EPOCH=', STEPS_PER_EPOCH)

    model_path = pth.join(
        model_checkpoint_path, model_name, 
    )
    model = build_cnn(config)
    #         model.summary()
#     model.compile(loss=config['loss'], optimizer=Adam(lr=config['learning_rate']),
#                   metrics=['acc', 'Precision', 'Recall', 'AUC'])
    initial_epoch = 0

    if pth.isdir(model_path) and len([_ for _ in os.listdir(model_path) if _.endswith('hdf5')]) >= 1:
        for layer in model.layers[:166]:
            layer.trainable = False
        for layer in model.layers[166:]:
            layer.trainable = True
            
        model.compile(loss=config['loss'], optimizer=Adam(lr=config['learning_rate']),
                  metrics=['acc', 'Precision', 'Recall', 'AUC'])

        model_chk_name = sorted(os.listdir(model_path))[-1]
        initial_epoch = int(model_chk_name.split('-')[0])
        model.load_weights(pth.join(model_path, model_chk_name))
    else:
        model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
                     metrics=['acc', 'Precision', 'Recall', 'AUC'])
        
        
        model.fit(
            x=train_dataset, epochs=PRE_TRAIN_EPOCH, # train only top layers for just a few epochs.
            validation_data=val_dataset, shuffle=True,
            steps_per_epoch=STEPS_PER_EPOCH,
            #callbacks = [checkpointer, es], #batch_size=config['batch_size']
            initial_epoch=initial_epoch,
            # steps_per_epoch=train_num_steps, validation_steps=val_num_steps,
            verbose=1)
        
        for i, layer in enumerate(model.layers):
            print(i, layer.name)
        
        for layer in model.layers[:166]:
            layer.trainable = False
        for layer in model.layers[166:]:
            layer.trainable = True
        
        model.compile(loss=config['loss'], optimizer=Adam(lr=config['learning_rate']),
                  metrics=['acc', 'Precision', 'Recall', 'AUC'])
        
        initial_epoch=PRE_TRAIN_EPOCH
            
    # ### Freeze first layer
    # conv_list = [layer for layer in model.layers if isinstance(layer, keras.layers.Conv2D)]
    # conv_list[0].trainable = False
    # # conv_list[1].trainable = False

    os.makedirs(model_path, exist_ok=True)
    model_filename = pth.join(model_path, '{fold:02d}-{epoch:06d}-{val_loss:0.6f}-{loss:0.6f}.hdf5')
    checkpointer = ModelCheckpoint(
        filepath=model_filename, verbose=1, 
        period=1, save_best_only=True, 
        monitor='val_loss'
    )
    es = EarlyStopping(monitor='val_loss', verbose=1, patience=10)

    hist = model.fit(
        x=train_dataset, #epochs=config['num_epoch'], 
        #batch_size = BATCH_SIZES[fold],
        epochs=EPOCHS[fold], 
        steps_per_epoch=STEPS_PER_EPOCH,
        validation_data=val_dataset, shuffle=True,
        callbacks = [get_lr_callback(), checkpointer], #, es], #batch_size=config['batch_size']
        initial_epoch=0, #### JUST 0 TO FIXED EPOCH COUNT #initial_epoch,
        # steps_per_epoch=train_num_steps, validation_steps=val_num_steps,
        verbose=1
    )
    
    print('Loading best model...')
    model.load_weights('fold-%i.h5'%fold)

    #K.clear_session()
    #del(model)
    
    chk_name_list = sorted([name for name in os.listdir(model_path) if name != '000000_last.hdf5'])
    for chk_name in chk_name_list[:-20]:
        os.remove(pth.join(model_path, chk_name))
    # clear_output()

#########################
#### FOLD 2
ResNet50V2-kfold_resize_270_conv_0_basech_0_act_relu_pool_X_betw_avg_fc_0_zscore_True_batch_80_BN_O
#########################
FILENAMES= 88102
TRAINING_FILENAMES= 70481
VALIDATION_FILENAMES= 17621
STEPS_PER_EPOCH= 882.0
lr= 4.9999999999999996e-06
Epoch 1/8
Epoch 00001: val_loss improved from inf to 0.00121, saving model to data/public/checkpoint/ResNet50V2-kfold_resize_270_conv_0_basech_0_act_relu_pool_X_betw_avg_fc_0_zscore_True_batch_80_BN_O/000001-0.001208-0.052671.hdf5
lr= 7e-05
Epoch 2/8
Epoch 00002: val_loss did not improve from 0.00121
lr= 0.00013499999999999997
Epoch 3/8
Epoch 00003: val_loss did not improve from 0.00121
lr= 0.00019999999999999998
Epoch 4/8
Epoch 00004: val_loss did not improve from 0.00121
lr= 0.000161
Epoch 5/8
Epoch 00005: val_loss improved from 0.00121 to 0.00092, saving model to data/public/checkpoint/ResNet50V2-kfold_resize_270_conv_0_basech_0_act_relu_pool_X_betw_avg_fc_0_zscore_True_batch_80_BN_O/000005-0.000918-0.0

### Inference

In [20]:
image_feature_description_for_test = {
    'image_raw': tf.io.FixedLenFeature([], tf.string),
    # 'randmark_id': tf.io.FixedLenFeature([], tf.int64),
    # 'id': tf.io.FixedLenFeature([], tf.string),
}

def _parse_image_function_for_test(example_proto):
    return tf.io.parse_single_example(example_proto, image_feature_description_for_test)

def map_func_for_test(target_record):
    img = target_record['image_raw']
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.dtypes.cast(img, tf.float32)
    return img

def resize_and_crop_func_for_test(image):
    result_image = tf.image.resize(image, config['aug']['resize'])
    #result_image = tf.image.random_crop(image, size=config['input_shape'], seed=7777)  # revive
    return result_image

def post_process_func_for_test(image):
    # result_image = result_image / 255
    result_image = my_model_base.preprocess_input(image)
    return result_image

In [21]:
submission_base_path = pth.join(data_base_path, 'submission')
os.makedirs(submission_base_path, exist_ok=True)

In [22]:
preds = []
# for conv_comb, activation, base_channel, \
#     between_type, fc_num, batch_size \
#         in itertools.product(conv_comb_list, activation_list,
#                               base_channel_list, between_type_list, fc_list,
#                               batch_size_list):
#     config['conv']['conv_num'] = conv_comb
#     config['conv']['base_channel'] = base_channel
#     config['activation'] = activation
#     config['between_type'] = between_type
#     config['fc']['fc_num'] = fc_num
#     config['batch_size'] = batch_size

for LEARNING_RATE in [1e-3]: #, 1e-4, 1e-5]: # just once
    base = BASE_MODEL_NAME

    base += '_resize_{}'.format(config['aug']['resize'][0])
    #base += '_input_{}'.format(config['input_shape'][0])
    base += '_conv_{}'.format('-'.join(map(lambda x:str(x),config['conv']['conv_num'])))
    base += '_basech_{}'.format(config['conv']['base_channel'])
    base += '_act_{}'.format(config['activation'])
    base += '_pool_{}'.format(config['pool']['type'])
    base += '_betw_{}'.format(config['between_type'])
    base += '_fc_{}'.format(config['fc']['fc_num'])
    base += '_zscore_{}'.format(config['is_zscore'])
    base += '_batch_{}'.format(config['batch_size'])
    if config['is_dropout']:
        base += '_DO_'+str(config['dropout_rate']).replace('.', '')
    if config['is_batchnorm']:
        base += '_BN'+'_O'
    else:
        base += '_BN'+'_X'

    model_name = base
    print(model_name)

    ### Define dataset
    test_dataset = tf.data.TFRecordDataset(test_tfrecord_path, compression_type='GZIP')
    test_dataset = test_dataset.map(_parse_image_function_for_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.map(map_func_for_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.map(resize_and_crop_func_for_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.batch(config['batch_size'])
    test_dataset = test_dataset.map(post_process_func_for_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    model_path = pth.join(
        model_checkpoint_path, model_name, 
    )
    model = build_cnn(config)
    #         model.summary()
    model.compile(loss=config['loss'], optimizer=Adam(lr=config['learning_rate']),
                  metrics=['acc', 'Precision', 'Recall', 'AUC'])
    initial_epoch = 0

    model_chk_name = sorted(os.listdir(model_path))[-1]
    print('selected weight to load=', model_chk_name)
    initial_epoch = int(model_chk_name.split('-')[0])
    model.load_weights(pth.join(model_path, model_chk_name))

    preds = model.predict(test_dataset, verbose=1)
    
    #pred_labels = np.argmax(preds, axis=1)
    #pred_probs = np.array([pred[indice] for pred, indice in zip(preds, pred_labels)])
    
    # argmax --> top3
    pred_labels = np.argsort(-preds)
    
    submission_csv_path = pth.join(data_base_path, submission_csv_name)
    submission_df = pd.read_csv(submission_csv_path)
    
    merged_df = []
    
    RANK_TO_SAVE = 5
    for i in range(RANK_TO_SAVE):
        tmp_df = submission_df.copy()
        
        tmp_labels = pred_labels[:, i]
        tmp_df['landmark_id'] = tmp_labels
        tmp_df['conf'] = np.array([pred[indice] for pred, indice in zip(preds, tmp_labels)])
        merged_df.append(tmp_df)
    
    submission_df = pd.concat(merged_df)
    
    #submission_df['landmark_id'] = pred_labels
    #submission_df['conf'] = pred_probs

    today_str = datetime.date.today().strftime('%Y%m%d')
    result_filename = '{}.csv'.format(model_name)
    submission_csv_fileaname = pth.join(submission_base_path, '_'.join([today_str, result_filename]))
    submission_df.to_csv(submission_csv_fileaname, index=False)

ResNet50V2-kfold_resize_270_conv_0_basech_0_act_relu_pool_X_betw_avg_fc_0_zscore_True_batch_80_BN_O
selected weight to load= 000006-0.000843-0.000243.hdf5


In [25]:
submission_csv_path = pth.join(data_base_path, submission_csv_name)
submission_df = pd.read_csv(submission_csv_path)

merged_df = []

RANK_TO_SAVE = 1
for i in range(RANK_TO_SAVE):
    tmp_df = submission_df.copy()

    tmp_labels = pred_labels[:, i]
    tmp_df['landmark_id'] = tmp_labels
    tmp_df['conf'] = np.array([pred[indice] for pred, indice in zip(preds, tmp_labels)])
    merged_df.append(tmp_df)

submission_df = pd.concat(merged_df)

#submission_df['landmark_id'] = pred_labels
#submission_df['conf'] = pred_probs

today_str = datetime.date.today().strftime('%Y%m%d')
result_filename = '{}_top1.csv'.format(model_name)
submission_csv_fileaname = pth.join(submission_base_path, '_'.join([today_str, result_filename]))
submission_df.to_csv(submission_csv_fileaname, index=False)