In [165]:
import pandas as pd 
from tqdm import tqdm_notebook
import numpy as np
import bcolz

In [166]:
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()


def load_array(fname):
    return bcolz.open(fname)[:]


In [210]:
df_test = pd.read_csv("data/resnet_full_224x224_blend_2resnets.csv")

In [211]:

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'cultivation',
 'artisinal_mine',
 'haze',
 'primary',
 'slash_burn',
 'habitation',
 'clear',
 'road',
 'selective_logging',
 'partly_cloudy',
 'agriculture',
 'water',
 'cloudy']

label_map = {'agriculture': 14,
 'artisinal_mine': 5,
 'bare_ground': 1,
 'blooming': 3,
 'blow_down': 0,
 'clear': 10,
 'cloudy': 16,
 'conventional_mine': 2,
 'cultivation': 4,
 'habitation': 9,
 'haze': 6,
 'partly_cloudy': 13,
 'primary': 7,
 'road': 11,
 'selective_logging': 12,
 'slash_burn': 8,
 'water': 15}

In [212]:
y_test = []

for f, tags in tqdm_notebook(df_test.values, miniters=1000):
    targets = np.zeros(17)
    
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
        

    y_test.append(targets)
    
y_test = np.array(y_test).astype(np.uint8)    




In [213]:
y_test[np.where(y_test[:,16]>0)].sum(axis=0)

array([   0,    0,    0,    0,    0,    1,  796, 1595,    0,    1,  566,
         24,    0,  497,  210,  676, 4240], dtype=uint32)

In [171]:
labels

['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'cultivation',
 'artisinal_mine',
 'haze',
 'primary',
 'slash_burn',
 'habitation',
 'clear',
 'road',
 'selective_logging',
 'partly_cloudy',
 'agriculture',
 'water',
 'cloudy']

In [245]:
threshold = 0.75

In [246]:
resnet_preds = load_array("data/raw_preds_224x224_blend_2resnets_5a_n_4a_ft_0.92905_armin_ordering.dat/")

In [247]:
resnet_preds[np.where(resnet_preds[:,16]>threshold)[0]].max(axis=0)

array([  1.12302008e-03,   1.84770692e-02,   6.34928947e-05,
         1.77549387e-04,   4.11946774e-02,   4.83893818e-04,
         3.84885222e-01,   3.00102293e-01,   3.53877316e-04,
         1.67951752e-02,   3.41507107e-01,   2.20302612e-01,
         1.73517648e-04,   3.57347369e-01,   3.15416217e-01,
         5.53288698e-01,   9.99991536e-01], dtype=float32)

In [248]:
oli_res_preds = load_array("data/raw_preds_224x224_blend_2resnets_5a_n_4a_ft_0.92905_armin_ordering.dat/")
oli_res_thres = load_array("data/resnet_thresholds_simple_aug_10xtta.dat/")

In [249]:
result = pd.DataFrame(oli_res_preds, columns = labels)
result.head(1)

Unnamed: 0,blow_down,bare_ground,conventional_mine,blooming,cultivation,artisinal_mine,haze,primary,slash_burn,habitation,clear,road,selective_logging,partly_cloudy,agriculture,water,cloudy
0,9.2e-05,0.000507,8.139934e-10,0.092864,0.001403,3.497085e-09,0.003379,0.999979,4e-06,0.000204,0.998511,0.002732,0.018024,8.8e-05,0.002829,0.00167,5.057313e-07


In [250]:
oli_res_preds[np.where(oli_res_preds[:,16]>threshold)[0]].shape

(2603L, 17L)

In [251]:
oli_res_preds[np.where(oli_res_preds[:,16]>threshold)[0]].max(axis=0)

array([  1.12302008e-03,   1.84770692e-02,   6.34928947e-05,
         1.77549387e-04,   4.11946774e-02,   4.83893818e-04,
         3.84885222e-01,   3.00102293e-01,   3.53877316e-04,
         1.67951752e-02,   3.41507107e-01,   2.20302612e-01,
         1.73517648e-04,   3.57347369e-01,   3.15416217e-01,
         5.53288698e-01,   9.99991536e-01], dtype=float32)

In [252]:
oli_res_preds[np.where(oli_res_preds[:,16]>threshold)[0]] = np.array([0]*16+[1])

In [253]:
oli_res_thres

array([ 0.58,  0.2 ,  0.12,  0.22,  0.29,  0.2 ,  0.25,  0.17,  0.22,
        0.23,  0.17,  0.25,  0.16,  0.28,  0.2 ,  0.23,  0.24])

In [254]:
label_preds = []
for i in tqdm_notebook(range(result.shape[0]), miniters=1000):
    a = result.ix[[i]]
    a = a.apply(lambda x: x > oli_res_thres, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    ' '.join(list(a.index))
    label_preds.append(' '.join(list(a.index)))

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  app.launch_new_instance()





In [255]:
df_test["tags"] = label_preds

In [256]:
y_test = []

for f, tags in tqdm_notebook(df_test.values, miniters=1000):
    targets = np.zeros(17)
    
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
        

    y_test.append(targets)




In [257]:
y_test = np.array(y_test).astype(np.uint8)

In [258]:
y_test[np.where(y_test[:,16]>0)].sum(axis=0)

array([   0,    0,    0,    0,    0,    1,  721, 1315,    0,    1,  476,
         24,    0,  476,  207,  627, 4240], dtype=uint32)

In [259]:
submission_file = 'resnet_weather_filtered_th0.75.csv'
df_test.to_csv(submission_file, index=False)

0.75
array([   0,    0,    0,    0,    0,    1,  721, 1315,    0,    1,  476,
         24,    0,  476,  207,  627, 4240], dtype=uint32)

0.7
array([   0,    0,    0,    0,    0,    1,  647, 1184,    0,    1,  422,
         24,    0,  446,  205,  581, 4240], dtype=uint32)

original

array([   0,    0,    0,    0,    0,    1,  796, 1595,    0,    1,  566,
         24,    0,  497,  210,  676, 4240], dtype=uint32)

In [243]:
labels

['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'cultivation',
 'artisinal_mine',
 'haze',
 'primary',
 'slash_burn',
 'habitation',
 'clear',
 'road',
 'selective_logging',
 'partly_cloudy',
 'agriculture',
 'water',
 'cloudy']