In [1]:
# Pandas for DataFrames
import pandas as pd
pd.set_option('display.max_column', 100)

# NumPy for numerical computing
import numpy as np
np.random.seed(123)
import random
random.seed(123)

import os

# Matplotlib for visualization
from matplotlib import pyplot as plt

# display plots in the notebook
%matplotlib inline

In [2]:
import sys
sys.path.append('./utils')

from data import Data
from models import Models
from tags import Tags
tags = Tags()

Using TensorFlow backend.


In [3]:
PLANET_KAGGLE_ROOT = '/data/planet-data/'
if not os.path.exists(PLANET_KAGGLE_ROOT):
    PLANET_KAGGLE_ROOT = '/Users/jiayou/Documents/Kaggle Data/Amazon'
    
N_TAGS = 17
N_TEST_T = 40669
N_TEST_F = 20522
N_TEST = N_TEST_T + N_TEST_F

In [5]:
thres = [0.23067564, 0.27402788, 0.15499838, 0.18645976, 0.12418672, 0.093219191, 0.14909597, 0.13256209, 0.041971382, 0.17731731, 0.10376091, 0.25468382, 0.090709485, 0.13336645, 0.13344041, 0.10004906, 0.036582272]
thres = [0.2]*17

def pred_to_output(pred):
    result = pd.DataFrame({
        'image_name': 
            ['test_{}'.format(i) for i in range(N_TEST_T)] + ['file_{}'.format(i) for i in range(N_TEST_F)],
        'tags': ['' for i in range(N_TEST)]
    })
    for i in range(len(pred)):
        current_pred = pred[i]
        current_tag = tags.pred_to_tags(current_pred, thres=thres)
        result.iat[i, 1] = current_tag
    return result

def consolidate(pred):
    return pred.mean(axis=0)

pred = None
pred8 = None

def predict(model_path, toy=None, batch_size=20):
    model = Models.load_resnet50(model_path)
    print('Model weights loaded')
    
    d = Data(train=[], toy=toy)
    
    cnt = 0
    pred = np.zeros((N_TEST, N_TAGS))
    pred8 = np.zeros((N_TEST * 8, N_TAGS))
    print('Start predicting..')
    for X_test in d.gen_test_augmented(batch_size):
        y_test = model.predict_on_batch(X_test)
        k = int(len(y_test) / 8 + 0.1)
        pred8[cnt*8:(cnt+k)*8,:] = y_test[:,:]
        for i in range(k):
            pred[cnt+i,:] = consolidate(y_test[8*i:8*(i+1),:])
        cnt += k
        print('Predicted {} images'.format(cnt))
    print('Predicted all {} images'.format(cnt))
            
    print('Saving raw predictions...')
    np.save('raw_pred.v9.tta.npy', pred)
    np.save('raw_pred.v9.tta8.npy', pred8)
    print('Saved')
    
    result = pred_to_output(pred)
    print('Saving submission file...')
    result.to_csv('submission.v9.tta.csv', index = None)
    print('Saved')
    return result

In [7]:
out = predict('./weights-v9.hdf5')

In [None]:
pred = np.load('raw_pred.v9.npy')
result = pred_to_output(pred)
result.to_csv('submission.v9-2.csv', index = None)