In [8]:
# Pandas for DataFrames
import pandas as pd
pd.set_option('display.max_column', 100)

# NumPy for numerical computing
import numpy as np
np.random.seed(123)
import random
random.seed(123)

import os

In [10]:
import sys
sys.path.append('./utils')

from data import Data
from models import Models
from tags import Tags
tags = Tags()

In [9]:
PLANET_KAGGLE_ROOT = '/data/planet-data/'
if not os.path.exists(PLANET_KAGGLE_ROOT):
    PLANET_KAGGLE_ROOT = '/Users/jiayou/Documents/Kaggle Data/Amazon'
    
N_TEST_T = 40669
N_TEST_F = 20522
N_TEST = N_TEST_T + N_TEST_F

In [1]:
thres = [0.23067564, 0.27402788, 0.15499838, 0.18645976, 0.12418672, 0.093219191, 0.14909597, 0.13256209, 0.041971382, 0.17731731, 0.10376091, 0.25468382, 0.090709485, 0.13336645, 0.13344041, 0.10004906, 0.036582272]

def pred_to_output(pred):
    result = pd.DataFrame({
        'image_name': 
            ['test_{}'.format(i) for i in range(N_TEST_T)] + ['file_{}'.format(i) for i in range(N_TEST_F)],
        'tags': ['' for i in range(N_TEST)]
    })
    for i in range(len(pred)):
        current_pred = pred[i]
        current_tag = tags.pred_to_tags(current_pred, thres=thres)
        result.iat[i, 1] = current_tag
    return result

def predict(model_path):
    model = Models.load_resnet50(model_path)
    print('Model weights loaded')
    
    d = Data(train=[])
    
    pred = None
    cnt = 0
    print('Start predicting..')
    for X_test in d.gen_test(100):
        y_test = model.predict_on_batch(X_test)
        if pred is None:
            pred = y_test
        else:
            pred = np.concatenate((pred, y_test))
        cnt += len(y_test)
        print('Predicted {} images'.format(cnt))
    print('Predicted all {} images'.format(cnt))
            
    print('Saving raw predictions...')
    np.save('raw_pred.v9.npy', pred)
    print('Saved')
    
    result = pred_to_output(pred)
    print('Saving submission file...')
    result.to_csv('submission.v9.csv', index = None)
    print('Saved')
    return result

In [3]:
out = predict('/data/kaggle-planet/weights-v9.hdf5')

In [None]:
pred = np.load('raw_pred.v9.npy')
result = pred_to_output(pred)
result.to_csv('submission.v9-2.csv', index = None)