In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc

import keras as k
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Input
from keras import backend as K
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
import cv2
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score, precision_score 
from skimage import io,transform

import time

import os
import fnmatch

In [3]:
datagen = ImageDataGenerator(
    rotation_range=90,
    fill_mode='reflect',
    horizontal_flip=True,
    vertical_flip=True)

def multilabelmetrics(y_true,y_pred):
    '''y_true and y_pred should be boolean np arrays
    of shape num_example x num_classes '''
    total = np.sum(y_true,axis = 0)
    tp = np.sum(y_true*y_pred,axis=0)
    tn = np.sum((1-y_true)*(1-y_pred),axis=0)
    fp = np.sum((1-y_true)*y_pred,axis=0)
    fn = np.sum(y_true*(1-y_pred),axis=0)
    return total,tp,tn,fp,fn

def combine_predictions(x,y,y1,y2,thresh,thresh1,thresh2,thresh3):
    y_pred = np.zeros((x.shape[0],17),np.uint8)
    y_bool = np.array((y > thresh),np.uint8)
    y1_bool = np.array((y1 > thresh1),np.uint8)
    y2_bool = np.array((y2 > thresh2)*np.tile(y1[:,0]>thresh3,(7,1)).T,np.uint8)
    y_pred[:,:7] = y2_bool
    y_pred[:,7:13] = y1_bool[:,1:]
    y_pred[:,13:] = y_bool
    return y_pred

callbacks = [EarlyStopping(monitor='val_loss', patience=2, verbose=0)]

In [5]:
x_train = np.zeros((40479,32,32,4), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = io.imread('train-tif-v2/{}.tif'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train[i,:,:,:] = np.array(transform.resize(img,(32,32),mode = 'constant'),np.float32)#automatically scales to [0,1] float
    i+=1
    y_train.append(targets)
  
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)


  0%|          | 0/40479 [00:00<?, ?it/s][A
100%|██████████| 40479/40479 [30:06<00:00, 22.41it/s]

(40479, 32, 32, 4)
(40479, 17)





In [6]:
#subtracting mean
train_mean = np.mean(x_train,axis = 0)
x_train -= train_mean

In [6]:
#weather classifier (last four labels - mutually exclusive)
x_train, x_val, y_train_w, y_val_w = train_test_split(x_train,y_train[:,-4:],test_size=0.1)
print(x_train.shape)
print(y_train_w.shape)
print(x_val.shape)
print(y_val_w.shape)

(36431, 32, 32, 4)
(36431, 4)
(4048, 32, 32, 4)
(4048, 4)


In [7]:
model = Sequential()#using same architecture for all three models
model.add(Conv2D(32, (3, 3), padding = 'same', input_shape=(32, 32, 4)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(48, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(48, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(48, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(2048))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(1024))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(4, activation='softmax'))

In [10]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])    
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
51s - loss: 0.2798 - acc: 0.9020 - val_loss: 0.3372 - val_acc: 0.8958
Epoch 2/10
50s - loss: 0.2739 - acc: 0.9037 - val_loss: 0.3277 - val_acc: 0.8849
Epoch 3/10
50s - loss: 0.2693 - acc: 0.9066 - val_loss: 0.5435 - val_acc: 0.8172
Epoch 4/10
50s - loss: 0.2695 - acc: 0.9054 - val_loss: 0.3443 - val_acc: 0.8755
Epoch 5/10
50s - loss: 0.2675 - acc: 0.9077 - val_loss: 0.3291 - val_acc: 0.8799


<keras.callbacks.History at 0x7fe7cc05ff98>

In [11]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.895511304818
thresh: 0.1 	F2 score: 0.911627435065
thresh: 0.15 	F2 score: 0.917131328816
thresh: 0.2 	F2 score: 0.912972896669
thresh: 0.25 	F2 score: 0.909155608884
thresh: 0.3 	F2 score: 0.906779361942
thresh: 0.35 	F2 score: 0.901515151515


  'precision', 'predicted', average, warn_for)


In [17]:
#continue with reduced learning rate
model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(lr=0.0005),
              metrics=['accuracy']) 
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
53s - loss: 0.2324 - acc: 0.9170 - val_loss: 0.5679 - val_acc: 0.7735
Epoch 2/10
50s - loss: 0.2301 - acc: 0.9178 - val_loss: 0.2679 - val_acc: 0.9103
Epoch 3/10
50s - loss: 0.2311 - acc: 0.9182 - val_loss: 1.5175 - val_acc: 0.4333
Epoch 4/10
50s - loss: 0.2258 - acc: 0.9198 - val_loss: 0.2574 - val_acc: 0.9118
Epoch 5/10
50s - loss: 0.2295 - acc: 0.9182 - val_loss: 0.3053 - val_acc: 0.8970
Epoch 6/10
50s - loss: 0.2283 - acc: 0.9197 - val_loss: 0.8871 - val_acc: 0.5949
Epoch 7/10
50s - loss: 0.2259 - acc: 0.9197 - val_loss: 0.3420 - val_acc: 0.8654


<keras.callbacks.History at 0x7fe792d8ff28>

In [18]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.885275503482
thresh: 0.1 	F2 score: 0.904314888011
thresh: 0.15 	F2 score: 0.910733401562
thresh: 0.2 	F2 score: 0.911067193676
thresh: 0.25 	F2 score: 0.904403115001
thresh: 0.3 	F2 score: 0.896115659703
thresh: 0.35 	F2 score: 0.885787220026


  'precision', 'predicted', average, warn_for)


In [20]:
#continue with reduced learning rate
model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(lr=0.0002),
              metrics=['accuracy']) 
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
53s - loss: 0.2128 - acc: 0.9230 - val_loss: 0.2325 - val_acc: 0.9163
Epoch 2/10
50s - loss: 0.2148 - acc: 0.9224 - val_loss: 0.2392 - val_acc: 0.9150
Epoch 3/10
50s - loss: 0.2130 - acc: 0.9227 - val_loss: 0.2427 - val_acc: 0.9155
Epoch 4/10
50s - loss: 0.2119 - acc: 0.9235 - val_loss: 0.2319 - val_acc: 0.9202
Epoch 5/10
50s - loss: 0.2115 - acc: 0.9232 - val_loss: 0.2357 - val_acc: 0.9190
Epoch 6/10
50s - loss: 0.2121 - acc: 0.9240 - val_loss: 0.2370 - val_acc: 0.9155
Epoch 7/10
50s - loss: 0.2121 - acc: 0.9231 - val_loss: 0.2348 - val_acc: 0.9163


<keras.callbacks.History at 0x7fe7900d3240>

In [21]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.929538984566
thresh: 0.1 	F2 score: 0.939752729155
thresh: 0.15 	F2 score: 0.94052912667
thresh: 0.2 	F2 score: 0.939029267834
thresh: 0.25 	F2 score: 0.93878811406
thresh: 0.3 	F2 score: 0.936964756258
thresh: 0.35 	F2 score: 0.931282938076


In [22]:
#continue with reduced learning rate
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
53s - loss: 0.2076 - acc: 0.9243 - val_loss: 0.2295 - val_acc: 0.9200
Epoch 2/10
50s - loss: 0.2067 - acc: 0.9255 - val_loss: 0.2311 - val_acc: 0.9187
Epoch 3/10
50s - loss: 0.2064 - acc: 0.9247 - val_loss: 0.2290 - val_acc: 0.9190
Epoch 4/10
50s - loss: 0.2074 - acc: 0.9247 - val_loss: 0.2339 - val_acc: 0.9175
Epoch 5/10
50s - loss: 0.2055 - acc: 0.9257 - val_loss: 0.2328 - val_acc: 0.9185
Epoch 6/10
50s - loss: 0.2045 - acc: 0.9265 - val_loss: 0.2300 - val_acc: 0.9185


<keras.callbacks.History at 0x7fe78e572668>

In [23]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.931025609354
thresh: 0.1 	F2 score: 0.942417184265
thresh: 0.15 	F2 score: 0.942870082816
thresh: 0.2 	F2 score: 0.940570299266
thresh: 0.25 	F2 score: 0.939952710333
thresh: 0.3 	F2 score: 0.937552936194
thresh: 0.35 	F2 score: 0.933300395257


  'precision', 'predicted', average, warn_for)


In [25]:
#continue with reduced learning rate
model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=20, steps_per_epoch=x_train.shape[0]/ 128, #callbacks=callbacks,
                  )

Epoch 1/20
54s - loss: 0.2023 - acc: 0.9262 - val_loss: 0.2309 - val_acc: 0.9172
Epoch 2/20
50s - loss: 0.2024 - acc: 0.9275 - val_loss: 0.2283 - val_acc: 0.9205
Epoch 3/20
50s - loss: 0.2031 - acc: 0.9263 - val_loss: 0.2317 - val_acc: 0.9195
Epoch 4/20
50s - loss: 0.2023 - acc: 0.9259 - val_loss: 0.2301 - val_acc: 0.9192
Epoch 5/20
50s - loss: 0.2019 - acc: 0.9266 - val_loss: 0.2308 - val_acc: 0.9150
Epoch 6/20
50s - loss: 0.2004 - acc: 0.9269 - val_loss: 0.2294 - val_acc: 0.9195
Epoch 7/20
50s - loss: 0.2016 - acc: 0.9265 - val_loss: 0.2282 - val_acc: 0.9190
Epoch 8/20
50s - loss: 0.2006 - acc: 0.9273 - val_loss: 0.2308 - val_acc: 0.9170
Epoch 9/20
50s - loss: 0.2005 - acc: 0.9259 - val_loss: 0.2298 - val_acc: 0.9200
Epoch 10/20
50s - loss: 0.2004 - acc: 0.9267 - val_loss: 0.2321 - val_acc: 0.9170
Epoch 11/20
50s - loss: 0.2016 - acc: 0.9264 - val_loss: 0.2303 - val_acc: 0.9185
Epoch 12/20
50s - loss: 0.2010 - acc: 0.9272 - val_loss: 0.2277 - val_acc: 0.9195
Epoch 13/20
50s - loss: 0

<keras.callbacks.History at 0x7fe78ad51940>

In [None]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

In [26]:
model.save("simple_tif_weather")

In [27]:
x_val = []
x_train = []
x_train = np.zeros((40479,32,32,4), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = io.imread('train-tif-v2/{}.tif'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train[i,:,:,:] = np.array(transform.resize(img,(32,32),mode = 'constant'),np.float32)#automatically scales to [0,1] float
    i+=1
    y_train.append(targets)
  
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [02:03<00:00, 326.65it/s]

(40479, 32, 32, 4)
(40479, 17)





In [9]:
y_train_2 = np.zeros((y_train.shape[0],7))
y_train_2[:,1:] = y_train[:,7:13]
y_train_2[:,0] = (np.sum(y_train[:,:7],axis=1)>0)
y_train_2 = np.array(y_train_2,np.uint8)

In [10]:
print(y_train[100,:])
print(y_train_2[100,:])

[0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0]
[1 0 0 0 0 1 0]


In [11]:
x_train -= train_mean
x_train, x_val, y_train_2, y_val_2 = train_test_split(x_train,y_train_2,test_size=0.1)
print(x_train.shape)
print(y_train_2.shape)
print(x_val.shape)
print(y_val_2.shape)

(36431, 32, 32, 4)
(36431, 7)
(4048, 32, 32, 4)
(4048, 7)


In [31]:
#model for the more common classes + 1 class for others
model1 = Sequential()
model1.add(Conv2D(32, (3, 3), padding = 'same', input_shape=(32, 32, 4)))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(48, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(48, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(48, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Conv2D(64, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(64, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(64, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Conv2D(128, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(128, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(128, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Flatten())
model1.add(Dense(2048))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Dense(1024))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Dense(7, activation='sigmoid'))

In [16]:
model1.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
52s - loss: 0.1931 - acc: 0.9239 - val_loss: 0.2500 - val_acc: 0.9066
Epoch 2/10
51s - loss: 0.1905 - acc: 0.9254 - val_loss: 0.2186 - val_acc: 0.9136
Epoch 3/10
51s - loss: 0.1905 - acc: 0.9252 - val_loss: 0.2065 - val_acc: 0.9205
Epoch 4/10
50s - loss: 0.1899 - acc: 0.9257 - val_loss: 0.2079 - val_acc: 0.9193
Epoch 5/10
51s - loss: 0.1896 - acc: 0.9253 - val_loss: 0.2203 - val_acc: 0.9195
Epoch 6/10
51s - loss: 0.1901 - acc: 0.9254 - val_loss: 0.2191 - val_acc: 0.9118


<keras.callbacks.History at 0x7f627a96a0f0>

In [17]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.838365032791
thresh: 0.1 	F2 score: 0.847489631401
thresh: 0.15 	F2 score: 0.846188746368
thresh: 0.2 	F2 score: 0.840427564313
thresh: 0.25 	F2 score: 0.835974225949
thresh: 0.3 	F2 score: 0.82996729379
thresh: 0.35 	F2 score: 0.823564123933


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [19]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0005),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
53s - loss: 0.1789 - acc: 0.9296 - val_loss: 0.1823 - val_acc: 0.9291
Epoch 2/10
50s - loss: 0.1790 - acc: 0.9293 - val_loss: 0.1898 - val_acc: 0.9243
Epoch 3/10
50s - loss: 0.1781 - acc: 0.9299 - val_loss: 0.1912 - val_acc: 0.9259
Epoch 4/10
51s - loss: 0.1777 - acc: 0.9294 - val_loss: 0.1844 - val_acc: 0.9281


<keras.callbacks.History at 0x7f62731aae10>

In [20]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.830908523274
thresh: 0.1 	F2 score: 0.852707221514
thresh: 0.15 	F2 score: 0.862059537022
thresh: 0.2 	F2 score: 0.862268589165
thresh: 0.25 	F2 score: 0.859701759946
thresh: 0.3 	F2 score: 0.856623214579
thresh: 0.35 	F2 score: 0.852857421463


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [22]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0002),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
53s - loss: 0.1720 - acc: 0.9320 - val_loss: 0.1810 - val_acc: 0.9295
Epoch 2/10
51s - loss: 0.1712 - acc: 0.9320 - val_loss: 0.1786 - val_acc: 0.9306
Epoch 3/10
51s - loss: 0.1712 - acc: 0.9320 - val_loss: 0.1772 - val_acc: 0.9312
Epoch 4/10
51s - loss: 0.1708 - acc: 0.9321 - val_loss: 0.1781 - val_acc: 0.9307
Epoch 5/10
51s - loss: 0.1709 - acc: 0.9324 - val_loss: 0.1832 - val_acc: 0.9297
Epoch 6/10
51s - loss: 0.1698 - acc: 0.9326 - val_loss: 0.1790 - val_acc: 0.9303


<keras.callbacks.History at 0x7f626fa3f470>

In [23]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.829298568509
thresh: 0.1 	F2 score: 0.85345522709
thresh: 0.15 	F2 score: 0.861311190811
thresh: 0.2 	F2 score: 0.862977485
thresh: 0.25 	F2 score: 0.862122573625
thresh: 0.3 	F2 score: 0.85891857489
thresh: 0.35 	F2 score: 0.854532251186


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [24]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
54s - loss: 0.1683 - acc: 0.9330 - val_loss: 0.1757 - val_acc: 0.9312
Epoch 2/10
51s - loss: 0.1679 - acc: 0.9336 - val_loss: 0.1778 - val_acc: 0.9302
Epoch 3/10
51s - loss: 0.1681 - acc: 0.9333 - val_loss: 0.1764 - val_acc: 0.9307
Epoch 4/10
51s - loss: 0.1672 - acc: 0.9336 - val_loss: 0.1759 - val_acc: 0.9307


<keras.callbacks.History at 0x7f626de75d68>

In [25]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.832387936603
thresh: 0.1 	F2 score: 0.856254497222
thresh: 0.15 	F2 score: 0.863327351744
thresh: 0.2 	F2 score: 0.863871335725
thresh: 0.25 	F2 score: 0.863582872569
thresh: 0.3 	F2 score: 0.859694592965
thresh: 0.35 	F2 score: 0.85628394751


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [26]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.00005),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
53s - loss: 0.1668 - acc: 0.9337 - val_loss: 0.1756 - val_acc: 0.9312
Epoch 2/10
51s - loss: 0.1664 - acc: 0.9341 - val_loss: 0.1760 - val_acc: 0.9308
Epoch 3/10
51s - loss: 0.1662 - acc: 0.9339 - val_loss: 0.1765 - val_acc: 0.9310
Epoch 4/10
51s - loss: 0.1663 - acc: 0.9339 - val_loss: 0.1761 - val_acc: 0.9311


<keras.callbacks.History at 0x7f626c245908>

In [27]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.83124676292
thresh: 0.1 	F2 score: 0.855295891568
thresh: 0.15 	F2 score: 0.862961359881
thresh: 0.2 	F2 score: 0.864819977261
thresh: 0.25 	F2 score: 0.863869289087
thresh: 0.3 	F2 score: 0.861268434262
thresh: 0.35 	F2 score: 0.857054414313


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [28]:
y_pred = model1.predict(x_val,batch_size=128)
bestthresh = 0
bestF2score = 0
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    F2score = fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples')
    print("thresh:",thresh,"\tF2 score:",F2score)
    if F2score > bestF2score:
        bestthresh = thresh
        bestF2score = F2score        

thresh: 0.05 	F2 score: 0.83124676292
thresh: 0.1 	F2 score: 0.855295891568
thresh: 0.15 	F2 score: 0.862961359881
thresh: 0.2 	F2 score: 0.864819977261
thresh: 0.25 	F2 score: 0.863869289087
thresh: 0.3 	F2 score: 0.861268434262
thresh: 0.35 	F2 score: 0.857054414313


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [29]:
total,tp,tn,fp,fn = multilabelmetrics(y_val_2,np.array(y_pred)>bestthresh)
d = {'Total':total,'TP':tp,'TN':tn,'FP':fp,'FN':fn}
pd.DataFrame(d, index=['others']+labels[7:13])

Unnamed: 0,FN,FP,TN,TP,Total
others,108,131,3687,122,230
cultivation,140,352,3233,323,463
habitation,79,297,3411,261,340
road,81,408,2842,717,798
agriculture,87,414,2418,1129,1216
water,107,325,2950,666,773
primary,4,159,159,3726,3730


In [30]:
num_rare = np.sum(np.sum(y_train[:,:7],axis=1)>0)
print(num_rare)

2180


In [31]:
model1.save("simple_tif_major")

In [32]:
x_train = []
x_train = np.zeros((num_rare,32,32,4), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    if(np.sum(targets[:7])>0):
        img = io.imread('train-tif-v2/{}.tif'.format(f))
        x_train[i,:,:,:] = np.array(transform.resize(img,(32,32),mode = 'constant'),np.float32)#automatically scales to [0,1] float
        i+=1
        y_train.append(targets)
    
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [00:06<00:00, 6083.39it/s]

(2180, 32, 32, 4)
(2180, 17)





In [33]:
x_train -= train_mean
x_train, x_val, y_train_3, y_val_3 = train_test_split(x_train,y_train[:,:7],test_size=0.1)
print(x_train.shape)
print(y_train_3.shape)
print(x_val.shape)
print(y_val_3.shape)

(1962, 32, 32, 4)
(1962, 7)
(218, 32, 32, 4)
(218, 7)


In [35]:
#model for the rarer classes
from keras.models import load_model
model2 = load_model("simple_tif_major")
for layer in model2.layers[:-1]:
    layer.trainable = False
model2.layers[-1].trainable = True

In [36]:
model2.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )#more steps per epoch to compensate for fewer images

Epoch 1/10
11s - loss: 0.7326 - acc: 0.7932 - val_loss: 0.3158 - val_acc: 0.8761
Epoch 2/10
8s - loss: 0.2782 - acc: 0.8951 - val_loss: 0.2675 - val_acc: 0.9050
Epoch 3/10
8s - loss: 0.2551 - acc: 0.9039 - val_loss: 0.2530 - val_acc: 0.9069
Epoch 4/10
8s - loss: 0.2421 - acc: 0.9061 - val_loss: 0.2433 - val_acc: 0.9102
Epoch 5/10
8s - loss: 0.2317 - acc: 0.9085 - val_loss: 0.2330 - val_acc: 0.9128
Epoch 6/10
8s - loss: 0.2238 - acc: 0.9097 - val_loss: 0.2252 - val_acc: 0.9089
Epoch 7/10
8s - loss: 0.2196 - acc: 0.9109 - val_loss: 0.2206 - val_acc: 0.9076
Epoch 8/10
8s - loss: 0.2158 - acc: 0.9124 - val_loss: 0.2186 - val_acc: 0.9102
Epoch 9/10
8s - loss: 0.2117 - acc: 0.9124 - val_loss: 0.2184 - val_acc: 0.9076
Epoch 10/10
8s - loss: 0.2084 - acc: 0.9134 - val_loss: 0.2165 - val_acc: 0.9102


<keras.callbacks.History at 0x7f6262d84b70>

In [37]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.732410619452
thresh: 0.1 	F2 score: 0.753445331427
thresh: 0.15 	F2 score: 0.759942974966
thresh: 0.2 	F2 score: 0.747626990746
thresh: 0.25 	F2 score: 0.729351178892
thresh: 0.3 	F2 score: 0.707487721249
thresh: 0.35 	F2 score: 0.690112130479


  'precision', 'predicted', average, warn_for)


In [38]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0005),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
11s - loss: 0.2041 - acc: 0.9148 - val_loss: 0.2133 - val_acc: 0.9109
Epoch 2/10
8s - loss: 0.2044 - acc: 0.9146 - val_loss: 0.2133 - val_acc: 0.9135
Epoch 3/10
8s - loss: 0.2027 - acc: 0.9156 - val_loss: 0.2121 - val_acc: 0.9115
Epoch 4/10
8s - loss: 0.2028 - acc: 0.9148 - val_loss: 0.2110 - val_acc: 0.9122
Epoch 5/10
8s - loss: 0.1985 - acc: 0.9167 - val_loss: 0.2120 - val_acc: 0.9122
Epoch 6/10
8s - loss: 0.1988 - acc: 0.9161 - val_loss: 0.2116 - val_acc: 0.9135
Epoch 7/10
8s - loss: 0.1986 - acc: 0.9164 - val_loss: 0.2132 - val_acc: 0.9115


<keras.callbacks.History at 0x7f6262bd24e0>

In [39]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.732055660955
thresh: 0.1 	F2 score: 0.751115347446
thresh: 0.15 	F2 score: 0.76605670069
thresh: 0.2 	F2 score: 0.751221255808
thresh: 0.25 	F2 score: 0.743665356051
thresh: 0.3 	F2 score: 0.735401194117
thresh: 0.35 	F2 score: 0.690112130479


  'precision', 'predicted', average, warn_for)


In [41]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0002),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
11s - loss: 0.1957 - acc: 0.9172 - val_loss: 0.2112 - val_acc: 0.9122
Epoch 2/10
8s - loss: 0.1959 - acc: 0.9170 - val_loss: 0.2114 - val_acc: 0.9135
Epoch 3/10
8s - loss: 0.1962 - acc: 0.9172 - val_loss: 0.2114 - val_acc: 0.9122
Epoch 4/10
8s - loss: 0.1967 - acc: 0.9168 - val_loss: 0.2111 - val_acc: 0.9128
Epoch 5/10
8s - loss: 0.1949 - acc: 0.9178 - val_loss: 0.2112 - val_acc: 0.9122
Epoch 6/10
8s - loss: 0.1960 - acc: 0.9170 - val_loss: 0.2113 - val_acc: 0.9128
Epoch 7/10
8s - loss: 0.1946 - acc: 0.9179 - val_loss: 0.2113 - val_acc: 0.9135


<keras.callbacks.History at 0x7f626285d588>

In [42]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.734141559256
thresh: 0.1 	F2 score: 0.75802338589
thresh: 0.15 	F2 score: 0.767722275176
thresh: 0.2 	F2 score: 0.761378529727
thresh: 0.25 	F2 score: 0.749308286006
thresh: 0.3 	F2 score: 0.72873889617
thresh: 0.35 	F2 score: 0.690112130479


  'precision', 'predicted', average, warn_for)


In [43]:
for layer in model2.layers[-7:]:
    layer.trainable = True
model2.layers[-7:]

[<keras.layers.core.Dense at 0x7f6266056e80>,
 <keras.layers.normalization.BatchNormalization at 0x7f626601a198>,
 <keras.layers.core.Activation at 0x7f626602db70>,
 <keras.layers.core.Dense at 0x7f626602da58>,
 <keras.layers.normalization.BatchNormalization at 0x7f6265ff5eb8>,
 <keras.layers.core.Activation at 0x7f6265fabfd0>,
 <keras.layers.core.Dense at 0x7f6265fabeb8>]

In [44]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
12s - loss: 0.1913 - acc: 0.9191 - val_loss: 0.2107 - val_acc: 0.9128
Epoch 2/10
10s - loss: 0.1877 - acc: 0.9201 - val_loss: 0.2098 - val_acc: 0.9135
Epoch 3/10
10s - loss: 0.1833 - acc: 0.9220 - val_loss: 0.2100 - val_acc: 0.9135
Epoch 4/10
10s - loss: 0.1803 - acc: 0.9231 - val_loss: 0.2107 - val_acc: 0.9135
Epoch 5/10
10s - loss: 0.1754 - acc: 0.9258 - val_loss: 0.2102 - val_acc: 0.9128


<keras.callbacks.History at 0x7f6262300c18>

In [45]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.74512076863
thresh: 0.1 	F2 score: 0.777656976051
thresh: 0.15 	F2 score: 0.770387359837
thresh: 0.2 	F2 score: 0.764708023882
thresh: 0.25 	F2 score: 0.743993010048
thresh: 0.3 	F2 score: 0.74006116208
thresh: 0.35 	F2 score: 0.705402650357


  'precision', 'predicted', average, warn_for)


In [46]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.00005),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
12s - loss: 0.1734 - acc: 0.9264 - val_loss: 0.2111 - val_acc: 0.9135
Epoch 2/10
10s - loss: 0.1721 - acc: 0.9264 - val_loss: 0.2112 - val_acc: 0.9135
Epoch 3/10
10s - loss: 0.1699 - acc: 0.9275 - val_loss: 0.2119 - val_acc: 0.9128
Epoch 4/10
10s - loss: 0.1678 - acc: 0.9283 - val_loss: 0.2127 - val_acc: 0.9122


<keras.callbacks.History at 0x7f6261d1f208>

In [47]:
y_pred = model2.predict(x_val,batch_size=128)
bestthresh = 0
bestF2score = 0
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    F2score = fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples')
    print("thresh:",thresh,"\tF2 score:",F2score)
    if F2score > bestF2score:
        bestthresh = thresh
        bestF2score = F2score    

thresh: 0.05 	F2 score: 0.748106060606
thresh: 0.1 	F2 score: 0.782571786012
thresh: 0.15 	F2 score: 0.777823285277
thresh: 0.2 	F2 score: 0.75629823795
thresh: 0.25 	F2 score: 0.754696373962
thresh: 0.3 	F2 score: 0.727319062181
thresh: 0.35 	F2 score: 0.699541284404


  'precision', 'predicted', average, warn_for)


In [48]:
total,tp,tn,fp,fn = multilabelmetrics(y_val_3,np.array(y_pred)>bestthresh)
d = {'Total':total,'TP':tp,'TN':tn,'FP':fp,'FN':fn}
pd.DataFrame(d, index=labels[:7])

Unnamed: 0,FN,FP,TN,TP,Total
blow_down,3,7,203,5,8
bare_ground,0,69,71,78,78
conventional_mine,7,15,191,5,12
blooming,2,23,155,38,40
artisinal_mine,3,12,167,36,39
selective_logging,2,60,133,23,25
slash_burn,5,35,157,21,26


In [49]:
model2.save("simple_tif_rare")

In [50]:
#F2 score on training set
x_val = []
x_train = []
x_train = np.zeros((40479,32,32,4), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = io.imread('train-tif-v2/{}.tif'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train[i,:,:,:] = np.array(transform.resize(img,(32,32),mode = 'constant'),np.float32)#automatically scales to [0,1] float
    i+=1
    y_train.append(targets)

y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [02:01<00:00, 334.45it/s]

(40479, 32, 32, 4)
(40479, 17)





In [51]:
x_train -= train_mean

In [52]:
y = model.predict(x_train,batch_size=128)
y1 = model1.predict(x_train,batch_size=128)
y2 = model2.predict(x_train,batch_size=128)

In [53]:
print(y.shape)
print(y1.shape)
print(y2.shape)

(40479, 4)
(40479, 7)
(40479, 7)


In [54]:
f2scorelist = []
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3]:
    for thresh1 in [0.05,0.1,0.15,0.2,0.25,0.3]:
        for thresh2 in [0.05,0.1,0.15,0.2,0.25,0.3]:
            for thresh3 in [0.05,0.1,0.15,0.2,0.25,0.3]:
                y_pred = combine_predictions(x_train,y,y1,y2,thresh,thresh1,thresh2,thresh3)
                f2scorelist.append([thresh,thresh1,thresh2,thresh3,fbeta_score(y_train,y_pred , beta=2, average='samples')])

In [55]:
f2scorelist

[[0.05, 0.05, 0.05, 0.05, 0.88024434047066269],
 [0.05, 0.05, 0.05, 0.1, 0.88798136787450066],
 [0.05, 0.05, 0.05, 0.15, 0.89143854612135653],
 [0.05, 0.05, 0.05, 0.2, 0.89299359567455816],
 [0.05, 0.05, 0.05, 0.25, 0.89369772988216756],
 [0.05, 0.05, 0.05, 0.3, 0.89408325365173713],
 [0.05, 0.05, 0.1, 0.05, 0.88416027406757103],
 [0.05, 0.05, 0.1, 0.1, 0.89013825172708327],
 [0.05, 0.05, 0.1, 0.15, 0.89274956847031361],
 [0.05, 0.05, 0.1, 0.2, 0.89387349685793716],
 [0.05, 0.05, 0.1, 0.25, 0.89433658470109534],
 [0.05, 0.05, 0.1, 0.3, 0.89457522569089254],
 [0.05, 0.05, 0.15, 0.05, 0.8864166618380066],
 [0.05, 0.05, 0.15, 0.1, 0.8913441878560624],
 [0.05, 0.05, 0.15, 0.15, 0.89345000378185502],
 [0.05, 0.05, 0.15, 0.2, 0.89431919238121582],
 [0.05, 0.05, 0.15, 0.25, 0.89467327633553118],
 [0.05, 0.05, 0.15, 0.3, 0.89483615667706229],
 [0.05, 0.05, 0.2, 0.05, 0.88795134217534011],
 [0.05, 0.05, 0.2, 0.1, 0.89217287146163216],
 [0.05, 0.05, 0.2, 0.15, 0.89392959397531113],
 [0.05, 0.05,

In [56]:
max([l[4] for l in f2scorelist])

0.92127759212018911

In [57]:
#Test set
x_train = []
x_val = []
x_test = np.zeros((61191,32,32,4), np.float32)
y_train = []

df_test = pd.read_csv('sample_submission_v2.csv')

i = 0 
for f, tags in tqdm(df_test.values, miniters=1000):
    img = io.imread('test-tif-v2/{}.tif'.format(f))
    x_test[i,:,:,:] = np.array(transform.resize(img,(32,32),mode = 'constant'),np.float32)#automatically scales to [0,1] float
    i+=1
print(x_test.shape)

100%|██████████| 61191/61191 [47:09<00:00, 25.20it/s]

(61191, 32, 32, 4)





In [58]:
x_test -= train_mean

y = model.predict(x_test,batch_size=128)
y1 = model1.predict(x_test,batch_size=128)
y2 = model2.predict(x_test,batch_size=128)

In [59]:
thresh,thresh1,thresh2,thresh3 = 0.2, 0.2, 0.3, 0.3
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [60]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_3net_tif_1.csv', index=False)
#test set score:0.91086

In [None]:
subm

In [61]:
thresh,thresh1,thresh2,thresh3 = 0.2, 0.2, 0.25, 0.3
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [62]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_3net_tif_2.csv', index=False)
#test set score:0.91079

In [63]:
thresh,thresh1,thresh2,thresh3 = 0.2, 0.2, 0.3, 0.25
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [64]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_3net_tif_3.csv', index=False)
#test set score:0.91077

In [65]:
thresh,thresh1,thresh2,thresh3 = 0.15, 0.2, 0.3, 0.3
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [66]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_3net_tif_4.csv', index=False)
#test set score:0.

In [67]:
thresh,thresh1,thresh2,thresh3 = 0.2, 0.2, 0.35, 0.35
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [68]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_3net_tif_5.csv', index=False)
#test set score:0.91083