In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc

import keras as k
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Input
from keras import backend as K
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
import cv2
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score, precision_score 
from skimage import io,transform

import time

import os
import fnmatch

Using TensorFlow backend.


In [2]:
datagen = ImageDataGenerator(
    rotation_range=90,
    fill_mode='reflect',
    horizontal_flip=True,
    vertical_flip=True)

def multilabelmetrics(y_true,y_pred):
    '''y_true and y_pred should be boolean np arrays
    of shape num_example x num_classes '''
    total = np.sum(y_true,axis = 0)
    tp = np.sum(y_true*y_pred,axis=0)
    tn = np.sum((1-y_true)*(1-y_pred),axis=0)
    fp = np.sum((1-y_true)*y_pred,axis=0)
    fn = np.sum(y_true*(1-y_pred),axis=0)
    return total,tp,tn,fp,fn

def combine_predictions(x,y,y1,y2,thresh,thresh1,thresh2,thresh3):
    y_pred = np.zeros((x.shape[0],17),np.uint8)
    y_bool = np.array((y > thresh),np.uint8)
    y1_bool = np.array((y1 > thresh1),np.uint8)
    y2_bool = np.array((y2 > thresh2)*np.tile(y1[:,0]>thresh3,(7,1)).T,np.uint8)
    y_pred[:,:7] = y2_bool
    y_pred[:,7:13] = y1_bool[:,1:]
    y_pred[:,13:] = y_bool
    return y_pred

callbacks = [EarlyStopping(monitor='val_loss', patience=2, verbose=0)]

In [3]:
x_train = np.zeros((40479,64,64,3), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = cv2.imread('train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train[i,:,:,:] = np.array(cv2.resize(img, (64, 64)),np.float32)/255.#139 minimum size for inception
    i+=1
    y_train.append(targets)
  
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [09:38<00:00, 69.94it/s]

(40479, 64, 64, 3)
(40479, 17)





In [4]:
#subtracting mean
train_mean = np.mean(x_train,axis = 0)
x_train -= train_mean

In [5]:
#weather classifier (last four labels - mutually exclusive)
x_train, x_val, y_train_w, y_val_w = train_test_split(x_train,y_train[:,-4:],test_size=0.1)
print(x_train.shape)
print(y_train_w.shape)
print(x_val.shape)
print(y_val_w.shape)

(36431, 64, 64, 3)
(36431, 4)
(4048, 64, 64, 3)
(4048, 4)


In [6]:
model = Sequential()#using same architecture for all three models
model.add(Conv2D(32, (3, 3), padding = 'same', input_shape=(64, 64, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(48, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(48, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(48, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(2048))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(1024))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(4, activation='softmax'))

In [7]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])    
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
182s - loss: 0.4063 - acc: 0.8605 - val_loss: 0.7732 - val_acc: 0.7922
Epoch 2/10
170s - loss: 0.3030 - acc: 0.8926 - val_loss: 0.4070 - val_acc: 0.8584
Epoch 3/10
170s - loss: 0.2789 - acc: 0.8991 - val_loss: 0.3092 - val_acc: 0.8797
Epoch 4/10
170s - loss: 0.2598 - acc: 0.9049 - val_loss: 0.4549 - val_acc: 0.8449
Epoch 5/10
170s - loss: 0.2498 - acc: 0.9082 - val_loss: 0.2317 - val_acc: 0.9096
Epoch 6/10
170s - loss: 0.2399 - acc: 0.9137 - val_loss: 0.2934 - val_acc: 0.8844
Epoch 7/10
170s - loss: 0.2334 - acc: 0.9172 - val_loss: 0.2912 - val_acc: 0.8854
Epoch 8/10
170s - loss: 0.2291 - acc: 0.9160 - val_loss: 0.2570 - val_acc: 0.9069


<keras.callbacks.History at 0x7f8e138ef438>

In [8]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.9361251294
thresh: 0.1 	F2 score: 0.93676624553
thresh: 0.15 	F2 score: 0.933676830416
thresh: 0.2 	F2 score: 0.930041878411
thresh: 0.25 	F2 score: 0.925024703557
thresh: 0.3 	F2 score: 0.921495623941
thresh: 0.35 	F2 score: 0.917201910408


  'recall', 'true', average, warn_for)


In [9]:
#continue with reduced learning rate
model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(lr=0.0005),
              metrics=['accuracy']) 
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
172s - loss: 0.2214 - acc: 0.9195 - val_loss: 0.2089 - val_acc: 0.9165
Epoch 2/10
170s - loss: 0.2130 - acc: 0.9226 - val_loss: 0.2198 - val_acc: 0.9190
Epoch 3/10
170s - loss: 0.2108 - acc: 0.9233 - val_loss: 0.5115 - val_acc: 0.8505
Epoch 4/10
170s - loss: 0.2103 - acc: 0.9237 - val_loss: 0.1950 - val_acc: 0.9214
Epoch 5/10
170s - loss: 0.2043 - acc: 0.9255 - val_loss: 0.1999 - val_acc: 0.9224
Epoch 6/10
170s - loss: 0.2111 - acc: 0.9226 - val_loss: 0.1980 - val_acc: 0.9254
Epoch 7/10
170s - loss: 0.2034 - acc: 0.9255 - val_loss: 0.2102 - val_acc: 0.9160


<keras.callbacks.History at 0x7f8e0c0e8cf8>

In [10]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.93630599473
thresh: 0.1 	F2 score: 0.944633152174
thresh: 0.15 	F2 score: 0.945753634952
thresh: 0.2 	F2 score: 0.943787643516
thresh: 0.25 	F2 score: 0.938076416337
thresh: 0.3 	F2 score: 0.932818087709
thresh: 0.35 	F2 score: 0.929718379447


  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [11]:
#continue with reduced learning rate
model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(lr=0.0002),
              metrics=['accuracy']) 
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
172s - loss: 0.1923 - acc: 0.9293 - val_loss: 0.1992 - val_acc: 0.9237
Epoch 2/10
171s - loss: 0.1914 - acc: 0.9300 - val_loss: 0.1862 - val_acc: 0.9259
Epoch 3/10
170s - loss: 0.1871 - acc: 0.9312 - val_loss: 0.1868 - val_acc: 0.9242
Epoch 4/10
170s - loss: 0.1831 - acc: 0.9321 - val_loss: 0.1883 - val_acc: 0.9237
Epoch 5/10
170s - loss: 0.1849 - acc: 0.9310 - val_loss: 0.1877 - val_acc: 0.9234


<keras.callbacks.History at 0x7f8df54bfc50>

In [12]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.941627552701
thresh: 0.1 	F2 score: 0.948378387916
thresh: 0.15 	F2 score: 0.949428289102
thresh: 0.2 	F2 score: 0.948704827781
thresh: 0.25 	F2 score: 0.945740400903
thresh: 0.3 	F2 score: 0.943093591191
thresh: 0.35 	F2 score: 0.938652832675


  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [13]:
#continue with reduced learning rate
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
172s - loss: 0.1783 - acc: 0.9336 - val_loss: 0.1822 - val_acc: 0.9318
Epoch 2/10
171s - loss: 0.1790 - acc: 0.9335 - val_loss: 0.1854 - val_acc: 0.9266
Epoch 3/10
171s - loss: 0.1748 - acc: 0.9338 - val_loss: 0.1824 - val_acc: 0.9259
Epoch 4/10
171s - loss: 0.1761 - acc: 0.9342 - val_loss: 0.1840 - val_acc: 0.9274


<keras.callbacks.History at 0x7f8d952dc6a0>

In [14]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.946557971014
thresh: 0.1 	F2 score: 0.948837168267
thresh: 0.15 	F2 score: 0.951228119706
thresh: 0.2 	F2 score: 0.94972826087
thresh: 0.25 	F2 score: 0.949063617542
thresh: 0.3 	F2 score: 0.943040654997
thresh: 0.35 	F2 score: 0.941576086957


  'recall', 'true', average, warn_for)


In [None]:
#continue with reduced learning rate
model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(datagen.flow(x_train,y_train_w, batch_size = 128), validation_data=(x_val, y_val_w),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

In [16]:
y_pred = model.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_w, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.945916854884
thresh: 0.1 	F2 score: 0.949700322323
thresh: 0.15 	F2 score: 0.951445746283
thresh: 0.2 	F2 score: 0.949792960663
thresh: 0.25 	F2 score: 0.949422407303
thresh: 0.3 	F2 score: 0.945440429136
thresh: 0.35 	F2 score: 0.939270421607


  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [17]:
model.save("simple_64_weather")

In [18]:
x_val = []
x_train = []
x_train = np.zeros((40479,64,64,3), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = cv2.imread('train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train[i,:,:,:] = np.array(cv2.resize(img, (64, 64)),np.float32)/255.#139 minimum size for inception
    i+=1
    y_train.append(targets)
  
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [00:57<00:00, 701.15it/s]

(40479, 64, 64, 3)
(40479, 17)





In [19]:
y_train_2 = np.zeros((y_train.shape[0],7))
y_train_2[:,1:] = y_train[:,7:13]
y_train_2[:,0] = (np.sum(y_train[:,:7],axis=1)>0)
y_train_2 = np.array(y_train_2,np.uint8)

In [20]:
print(y_train[100,:])
print(y_train_2[100,:])

[0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0]
[1 0 0 0 0 1 0]


In [21]:
x_train -= train_mean
x_train, x_val, y_train_2, y_val_2 = train_test_split(x_train,y_train_2,test_size=0.1)
print(x_train.shape)
print(y_train_2.shape)
print(x_val.shape)
print(y_val_2.shape)

(36431, 64, 64, 3)
(36431, 7)
(4048, 64, 64, 3)
(4048, 7)


In [24]:
#model for the more common classes + 1 class for others
model1 = Sequential()
model1.add(Conv2D(32, (3, 3), padding = 'same', input_shape=(64,64,3)))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(48, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(48, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(48, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Conv2D(64, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(64, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(64, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Conv2D(128, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(128, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Conv2D(128, (3, 3), padding = 'same'))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Flatten())
model1.add(Dense(2048))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Dense(1024))
model1.add(BatchNormalization())
model1.add(Activation('relu'))
model1.add(Dense(7, activation='sigmoid'))

In [25]:
model1.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
173s - loss: 0.2903 - acc: 0.8815 - val_loss: 0.4686 - val_acc: 0.8462
Epoch 2/10
170s - loss: 0.2455 - acc: 0.8993 - val_loss: 0.3574 - val_acc: 0.8333
Epoch 3/10
171s - loss: 0.2266 - acc: 0.9084 - val_loss: 0.2270 - val_acc: 0.9076
Epoch 4/10
170s - loss: 0.2182 - acc: 0.9122 - val_loss: 0.3393 - val_acc: 0.8865
Epoch 5/10
171s - loss: 0.2097 - acc: 0.9159 - val_loss: 0.2161 - val_acc: 0.9103
Epoch 6/10
171s - loss: 0.2058 - acc: 0.9177 - val_loss: 0.2638 - val_acc: 0.9001
Epoch 7/10
171s - loss: 0.2021 - acc: 0.9198 - val_loss: 0.1982 - val_acc: 0.9206
Epoch 8/10
171s - loss: 0.1968 - acc: 0.9216 - val_loss: 0.2804 - val_acc: 0.9082
Epoch 9/10
171s - loss: 0.1945 - acc: 0.9224 - val_loss: 0.1878 - val_acc: 0.9236
Epoch 10/10
171s - loss: 0.1916 - acc: 0.9237 - val_loss: 0.5316 - val_acc: 0.8945


<keras.callbacks.History at 0x7f8cffe61ba8>

In [26]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.746801426864
thresh: 0.1 	F2 score: 0.763314402342
thresh: 0.15 	F2 score: 0.765838270168
thresh: 0.2 	F2 score: 0.760554254006
thresh: 0.25 	F2 score: 0.755449838243
thresh: 0.3 	F2 score: 0.750917448075
thresh: 0.35 	F2 score: 0.745271867972


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [27]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0005),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
173s - loss: 0.1857 - acc: 0.9255 - val_loss: 0.1736 - val_acc: 0.9297
Epoch 2/10
171s - loss: 0.1828 - acc: 0.9275 - val_loss: 0.1717 - val_acc: 0.9309
Epoch 3/10
171s - loss: 0.1805 - acc: 0.9278 - val_loss: 0.1859 - val_acc: 0.9241
Epoch 4/10
171s - loss: 0.1793 - acc: 0.9290 - val_loss: 0.1820 - val_acc: 0.9285
Epoch 5/10
171s - loss: 0.1783 - acc: 0.9290 - val_loss: 0.1695 - val_acc: 0.9315
Epoch 6/10
171s - loss: 0.1773 - acc: 0.9295 - val_loss: 0.1745 - val_acc: 0.9309
Epoch 7/10
171s - loss: 0.1752 - acc: 0.9297 - val_loss: 0.1751 - val_acc: 0.9310
Epoch 8/10
171s - loss: 0.1743 - acc: 0.9304 - val_loss: 0.1967 - val_acc: 0.9174


<keras.callbacks.History at 0x7f8d8efbfc18>

In [28]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.843964081591
thresh: 0.1 	F2 score: 0.861422880754
thresh: 0.15 	F2 score: 0.866151013948
thresh: 0.2 	F2 score: 0.867404932153
thresh: 0.25 	F2 score: 0.863969777581
thresh: 0.3 	F2 score: 0.858761041045
thresh: 0.35 	F2 score: 0.853935328968


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [29]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0002),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
173s - loss: 0.1685 - acc: 0.9330 - val_loss: 0.1595 - val_acc: 0.9353
Epoch 2/10
171s - loss: 0.1671 - acc: 0.9334 - val_loss: 0.1609 - val_acc: 0.9349
Epoch 3/10
171s - loss: 0.1655 - acc: 0.9336 - val_loss: 0.1587 - val_acc: 0.9370
Epoch 4/10
170s - loss: 0.1644 - acc: 0.9347 - val_loss: 0.1629 - val_acc: 0.9356
Epoch 5/10
170s - loss: 0.1640 - acc: 0.9343 - val_loss: 0.1586 - val_acc: 0.9370
Epoch 6/10
171s - loss: 0.1640 - acc: 0.9347 - val_loss: 0.1621 - val_acc: 0.9348
Epoch 7/10
171s - loss: 0.1629 - acc: 0.9350 - val_loss: 0.1610 - val_acc: 0.9348
Epoch 8/10
171s - loss: 0.1624 - acc: 0.9353 - val_loss: 0.1589 - val_acc: 0.9362


<keras.callbacks.History at 0x7f8d8d390828>

In [30]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.844870196703
thresh: 0.1 	F2 score: 0.868406765499
thresh: 0.15 	F2 score: 0.877830717057
thresh: 0.2 	F2 score: 0.88164152037
thresh: 0.25 	F2 score: 0.879562725052
thresh: 0.3 	F2 score: 0.87795035327
thresh: 0.35 	F2 score: 0.87598201732


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [31]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
174s - loss: 0.1597 - acc: 0.9363 - val_loss: 0.1564 - val_acc: 0.9371
Epoch 2/10
171s - loss: 0.1592 - acc: 0.9366 - val_loss: 0.1550 - val_acc: 0.9376
Epoch 3/10
171s - loss: 0.1585 - acc: 0.9368 - val_loss: 0.1552 - val_acc: 0.9372
Epoch 4/10
171s - loss: 0.1578 - acc: 0.9369 - val_loss: 0.1565 - val_acc: 0.9377
Epoch 5/10
171s - loss: 0.1575 - acc: 0.9369 - val_loss: 0.1613 - val_acc: 0.9355


<keras.callbacks.History at 0x7f8d8b7d4278>

In [32]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.85135681471
thresh: 0.1 	F2 score: 0.869788309809
thresh: 0.15 	F2 score: 0.876747651648
thresh: 0.2 	F2 score: 0.878477332892
thresh: 0.25 	F2 score: 0.877457525162
thresh: 0.3 	F2 score: 0.874820019708
thresh: 0.35 	F2 score: 0.871043856364


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [33]:
#continue with reduced learning rate
model1.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.00005),
              metrics=['accuracy']) 
model1.fit_generator(datagen.flow(x_train,y_train_2, batch_size = 128), validation_data=(x_val, y_val_2),
                  verbose=2, epochs=10, steps_per_epoch=x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
173s - loss: 0.1569 - acc: 0.9374 - val_loss: 0.1548 - val_acc: 0.9383
Epoch 2/10
171s - loss: 0.1558 - acc: 0.9379 - val_loss: 0.1549 - val_acc: 0.9383
Epoch 3/10
171s - loss: 0.1551 - acc: 0.9381 - val_loss: 0.1542 - val_acc: 0.9380
Epoch 4/10
171s - loss: 0.1548 - acc: 0.9385 - val_loss: 0.1546 - val_acc: 0.9384
Epoch 5/10
171s - loss: 0.1544 - acc: 0.9381 - val_loss: 0.1555 - val_acc: 0.9387
Epoch 6/10
171s - loss: 0.1556 - acc: 0.9379 - val_loss: 0.1546 - val_acc: 0.9377


<keras.callbacks.History at 0x7f8d89c17c18>

In [34]:
y_pred = model1.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.855414233622
thresh: 0.1 	F2 score: 0.872541230496
thresh: 0.15 	F2 score: 0.879843690059
thresh: 0.2 	F2 score: 0.881157837357
thresh: 0.25 	F2 score: 0.881747899563
thresh: 0.3 	F2 score: 0.879954028018
thresh: 0.35 	F2 score: 0.877321386263


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [35]:
y_pred = model1.predict(x_val,batch_size=128)
bestthresh = 0
bestF2score = 0
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    F2score = fbeta_score(y_val_2, np.array(y_pred)>thresh, beta=2, average='samples')
    print("thresh:",thresh,"\tF2 score:",F2score)
    if F2score > bestF2score:
        bestthresh = thresh
        bestF2score = F2score        

thresh: 0.05 	F2 score: 0.855414233622
thresh: 0.1 	F2 score: 0.872541230496
thresh: 0.15 	F2 score: 0.879843690059
thresh: 0.2 	F2 score: 0.881157837357
thresh: 0.25 	F2 score: 0.881747899563
thresh: 0.3 	F2 score: 0.879954028018
thresh: 0.35 	F2 score: 0.877321386263


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [36]:
total,tp,tn,fp,fn = multilabelmetrics(y_val_2,np.array(y_pred)>bestthresh)
d = {'Total':total,'TP':tp,'TN':tn,'FP':fp,'FN':fn}
pd.DataFrame(d, index=['others']+labels[7:13])

Unnamed: 0,FN,FP,TN,TP,Total
others,101,115,3734,98,199
cultivation,142,249,3355,302,444
habitation,57,208,3475,308,365
road,62,310,2946,730,792
agriculture,86,352,2474,1136,1222
water,133,255,3050,610,743
primary,11,140,136,3761,3772


In [37]:
num_rare = np.sum(np.sum(y_train[:,:7],axis=1)>0)
print(num_rare)

2180


In [38]:
model1.save("simple_64_major")

In [39]:
x_val = []
x_train = []
x_train = np.zeros((num_rare,64,64,3), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = cv2.imread('train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    if(np.sum(targets[:7])>0):
        x_train[i,:,:,:] = np.array(cv2.resize(img, (64, 64)),np.float32)/255.#139 minimum size for inception
        i+=1
        y_train.append(targets)
    
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [00:53<00:00, 756.77it/s]

(2180, 64, 64, 3)
(2180, 17)





In [40]:
x_train -= train_mean
x_train, x_val, y_train_3, y_val_3 = train_test_split(x_train,y_train[:,:7],test_size=0.1)
print(x_train.shape)
print(y_train_3.shape)
print(x_val.shape)
print(y_val_3.shape)

(1962, 64, 64, 3)
(1962, 7)
(218, 64, 64, 3)
(218, 7)


In [41]:
#model for the rarer classes
from keras.models import load_model
model2 = load_model("simple_64_major")
for layer in model2.layers[:-1]:
    layer.trainable = False
model2.layers[-1].trainable = True

In [42]:
model2.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )#more steps per epoch to compensate for fewer images

Epoch 1/10
31s - loss: 0.5756 - acc: 0.8271 - val_loss: 0.2896 - val_acc: 0.8847
Epoch 2/10
27s - loss: 0.2586 - acc: 0.9021 - val_loss: 0.2594 - val_acc: 0.8958
Epoch 3/10
27s - loss: 0.2296 - acc: 0.9095 - val_loss: 0.2473 - val_acc: 0.9017
Epoch 4/10
27s - loss: 0.2179 - acc: 0.9129 - val_loss: 0.2423 - val_acc: 0.9017
Epoch 5/10
27s - loss: 0.2144 - acc: 0.9129 - val_loss: 0.2367 - val_acc: 0.9076
Epoch 6/10
27s - loss: 0.2088 - acc: 0.9157 - val_loss: 0.2359 - val_acc: 0.9069
Epoch 7/10
27s - loss: 0.2043 - acc: 0.9175 - val_loss: 0.2283 - val_acc: 0.9076
Epoch 8/10
27s - loss: 0.2039 - acc: 0.9175 - val_loss: 0.2277 - val_acc: 0.9069
Epoch 9/10
27s - loss: 0.2000 - acc: 0.9192 - val_loss: 0.2365 - val_acc: 0.9063
Epoch 10/10
27s - loss: 0.1989 - acc: 0.9200 - val_loss: 0.2310 - val_acc: 0.9102


<keras.callbacks.History at 0x7f8d873189b0>

In [43]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.697888948462
thresh: 0.1 	F2 score: 0.732765068774
thresh: 0.15 	F2 score: 0.753088718112
thresh: 0.2 	F2 score: 0.728170466129
thresh: 0.25 	F2 score: 0.710228100136
thresh: 0.3 	F2 score: 0.668450560652
thresh: 0.35 	F2 score: 0.652395514781


  'precision', 'predicted', average, warn_for)


In [44]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0005),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
31s - loss: 0.1967 - acc: 0.9202 - val_loss: 0.2275 - val_acc: 0.9076
Epoch 2/10
27s - loss: 0.1950 - acc: 0.9210 - val_loss: 0.2262 - val_acc: 0.9089
Epoch 3/10
27s - loss: 0.1951 - acc: 0.9209 - val_loss: 0.2262 - val_acc: 0.9076
Epoch 4/10
27s - loss: 0.1935 - acc: 0.9207 - val_loss: 0.2293 - val_acc: 0.9096
Epoch 5/10
27s - loss: 0.1923 - acc: 0.9211 - val_loss: 0.2249 - val_acc: 0.9102
Epoch 6/10
27s - loss: 0.1924 - acc: 0.9220 - val_loss: 0.2262 - val_acc: 0.9096
Epoch 7/10
27s - loss: 0.1912 - acc: 0.9218 - val_loss: 0.2251 - val_acc: 0.9076
Epoch 8/10
27s - loss: 0.1897 - acc: 0.9217 - val_loss: 0.2255 - val_acc: 0.9089


<keras.callbacks.History at 0x7f8d87167390>

In [45]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.706999549889
thresh: 0.1 	F2 score: 0.737734487734
thresh: 0.15 	F2 score: 0.74578021367
thresh: 0.2 	F2 score: 0.73107963647
thresh: 0.25 	F2 score: 0.700252856216
thresh: 0.3 	F2 score: 0.681156254551
thresh: 0.35 	F2 score: 0.651849424785


  'precision', 'predicted', average, warn_for)


In [46]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0002),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
30s - loss: 0.1901 - acc: 0.9223 - val_loss: 0.2267 - val_acc: 0.9056
Epoch 2/10
27s - loss: 0.1888 - acc: 0.9229 - val_loss: 0.2272 - val_acc: 0.9069
Epoch 3/10
27s - loss: 0.1882 - acc: 0.9228 - val_loss: 0.2256 - val_acc: 0.9083
Epoch 4/10
27s - loss: 0.1889 - acc: 0.9222 - val_loss: 0.2252 - val_acc: 0.9083
Epoch 5/10
27s - loss: 0.1883 - acc: 0.9232 - val_loss: 0.2262 - val_acc: 0.9076
Epoch 6/10
27s - loss: 0.1885 - acc: 0.9225 - val_loss: 0.2252 - val_acc: 0.9089
Epoch 7/10
27s - loss: 0.1892 - acc: 0.9222 - val_loss: 0.2243 - val_acc: 0.9083
Epoch 8/10
27s - loss: 0.1888 - acc: 0.9228 - val_loss: 0.2264 - val_acc: 0.9063
Epoch 9/10
27s - loss: 0.1893 - acc: 0.9222 - val_loss: 0.2253 - val_acc: 0.9076
Epoch 10/10
27s - loss: 0.1881 - acc: 0.9228 - val_loss: 0.2247 - val_acc: 0.9089


<keras.callbacks.History at 0x7f8d86fa5cc0>

In [47]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.710094059865
thresh: 0.1 	F2 score: 0.734348729762
thresh: 0.15 	F2 score: 0.756947754081
thresh: 0.2 	F2 score: 0.737960370414
thresh: 0.25 	F2 score: 0.706150628169
thresh: 0.3 	F2 score: 0.67249162662
thresh: 0.35 	F2 score: 0.658511722732


  'precision', 'predicted', average, warn_for)


In [48]:
for layer in model2.layers[-7:]:
    layer.trainable = True
model2.layers[-7:]

[<keras.layers.core.Dense at 0x7f8d8917aba8>,
 <keras.layers.normalization.BatchNormalization at 0x7f8d8912d828>,
 <keras.layers.core.Activation at 0x7f8d8913ef28>,
 <keras.layers.core.Dense at 0x7f8d8913ed30>,
 <keras.layers.normalization.BatchNormalization at 0x7f8d890a9048>,
 <keras.layers.core.Activation at 0x7f8d890b94a8>,
 <keras.layers.core.Dense at 0x7f8d8904d780>]

In [49]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
34s - loss: 0.1819 - acc: 0.9255 - val_loss: 0.2190 - val_acc: 0.9069
Epoch 2/10
31s - loss: 0.1706 - acc: 0.9297 - val_loss: 0.2162 - val_acc: 0.9089
Epoch 3/10
31s - loss: 0.1625 - acc: 0.9327 - val_loss: 0.2115 - val_acc: 0.9115
Epoch 4/10
31s - loss: 0.1538 - acc: 0.9363 - val_loss: 0.2102 - val_acc: 0.9109
Epoch 5/10
31s - loss: 0.1472 - acc: 0.9396 - val_loss: 0.2117 - val_acc: 0.9128
Epoch 6/10
31s - loss: 0.1399 - acc: 0.9425 - val_loss: 0.2106 - val_acc: 0.9135
Epoch 7/10
31s - loss: 0.1328 - acc: 0.9460 - val_loss: 0.2110 - val_acc: 0.9148


<keras.callbacks.History at 0x7f8d86a55358>

In [50]:
y_pred = model2.predict(x_val,batch_size=128)
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    print("thresh:",thresh,"\tF2 score:",fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples'))

thresh: 0.05 	F2 score: 0.775207845162
thresh: 0.1 	F2 score: 0.779803275216
thresh: 0.15 	F2 score: 0.772800912136
thresh: 0.2 	F2 score: 0.753842487787
thresh: 0.25 	F2 score: 0.736695261925
thresh: 0.3 	F2 score: 0.726355958007
thresh: 0.35 	F2 score: 0.69730397024


  'precision', 'predicted', average, warn_for)


In [51]:
model2.compile(loss='binary_crossentropy', 
              optimizer=Adam(lr=0.00005),
              metrics=['accuracy']) 
model2.fit_generator(datagen.flow(x_train,y_train_3, batch_size = 128), validation_data=(x_val, y_val_3),
                  verbose=2, epochs=10, steps_per_epoch=10*x_train.shape[0]/ 128, callbacks=callbacks,
                  )

Epoch 1/10
34s - loss: 0.1276 - acc: 0.9486 - val_loss: 0.2127 - val_acc: 0.9102
Epoch 2/10
31s - loss: 0.1261 - acc: 0.9491 - val_loss: 0.2124 - val_acc: 0.9122
Epoch 3/10
31s - loss: 0.1241 - acc: 0.9500 - val_loss: 0.2140 - val_acc: 0.9109
Epoch 4/10
31s - loss: 0.1198 - acc: 0.9519 - val_loss: 0.2154 - val_acc: 0.9135
Epoch 5/10
31s - loss: 0.1166 - acc: 0.9542 - val_loss: 0.2184 - val_acc: 0.9102


<keras.callbacks.History at 0x7f8d86462908>

In [52]:
y_pred = model2.predict(x_val,batch_size=128)
bestthresh = 0
bestF2score = 0
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3,0.35]:
    F2score = fbeta_score(y_val_3, np.array(y_pred)>thresh, beta=2, average='samples')
    print("thresh:",thresh,"\tF2 score:",F2score)
    if F2score > bestF2score:
        bestthresh = thresh
        bestF2score = F2score    

thresh: 0.05 	F2 score: 0.774461522168
thresh: 0.1 	F2 score: 0.771036048559
thresh: 0.15 	F2 score: 0.755524610456
thresh: 0.2 	F2 score: 0.751576214306
thresh: 0.25 	F2 score: 0.735603081933
thresh: 0.3 	F2 score: 0.705329838357
thresh: 0.35 	F2 score: 0.689311198486


  'precision', 'predicted', average, warn_for)


In [53]:
total,tp,tn,fp,fn = multilabelmetrics(y_val_3,np.array(y_pred)>bestthresh)
d = {'Total':total,'TP':tp,'TN':tn,'FP':fp,'FN':fn}
pd.DataFrame(d, index=labels[:7])

Unnamed: 0,FN,FP,TN,TP,Total
blow_down,2,20,183,13,15
bare_ground,1,72,60,85,86
conventional_mine,1,17,192,8,9
blooming,3,27,155,33,36
artisinal_mine,2,30,154,32,34
selective_logging,5,57,133,23,28
slash_burn,1,46,157,14,15


In [54]:
model2.save("simple_64_rare")

In [55]:
#F2 score on training set
x_val = []
x_train = []
x_train = np.zeros((40479,64,64,3), np.float32)
y_train = []

df_train = pd.read_csv('train_v2.csv')

labels = ['blow_down',
 'bare_ground',
 'conventional_mine',
 'blooming',
 'artisinal_mine',
 'selective_logging',         
 'slash_burn', 
 'cultivation',
 'habitation',
 'road',
 'agriculture',
 'water',
 'primary',
 'partly_cloudy', 
 'cloudy',
 'clear',
 'haze',]

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

i=0

for f, tags in tqdm(df_train.values[:40479], miniters=1000):    
    img = cv2.imread('train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train[i,:,:,:] = np.array(cv2.resize(img, (64, 64)),np.float32)/255.#139 minimum size for inception
    i+=1
    y_train.append(targets)

y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

100%|██████████| 40479/40479 [00:57<00:00, 705.11it/s]

(40479, 64, 64, 3)
(40479, 17)





In [56]:
x_train -= train_mean

In [57]:
y = model.predict(x_train,batch_size=128)
y1 = model1.predict(x_train,batch_size=128)
y2 = model2.predict(x_train,batch_size=128)

In [58]:
print(y.shape)
print(y1.shape)
print(y2.shape)

(40479, 4)
(40479, 7)
(40479, 7)


In [59]:
f2scorelist = []
for thresh in [0.05,0.1,0.15,0.2,0.25,0.3]:
    for thresh1 in [0.05,0.1,0.15,0.2,0.25,0.3]:
        for thresh2 in [0.05,0.1,0.15,0.2,0.25,0.3]:
            for thresh3 in [0.05,0.1,0.15,0.2,0.25,0.3]:
                y_pred = combine_predictions(x_train,y,y1,y2,thresh,thresh1,thresh2,thresh3)
                f2scorelist.append([thresh,thresh1,thresh2,thresh3,fbeta_score(y_train,y_pred , beta=2, average='samples')])

In [60]:
f2scorelist.sort(key=lambda x: x[4],reverse=True)
print(f2scorelist[:10])

[[0.15, 0.25, 0.3, 0.25, 0.92987068926812499], [0.15, 0.25, 0.3, 0.2, 0.92981925261849585], [0.15, 0.25, 0.25, 0.25, 0.92979034269120875], [0.15, 0.25, 0.3, 0.3, 0.92978847562965194], [0.15, 0.25, 0.25, 0.3, 0.92973720344823729], [0.15, 0.2, 0.3, 0.25, 0.92972472618296254], [0.15, 0.25, 0.25, 0.2, 0.92968429441344314], [0.15, 0.2, 0.3, 0.2, 0.92966962429766165], [0.15, 0.25, 0.2, 0.25, 0.92965995026552828], [0.2, 0.25, 0.3, 0.25, 0.92965857834047638]]


In [69]:
f2scorelistfiner = []
for thresh in [0.12,0.14,0.15,0.16,0.18,0.2,0.22]:
    for thresh1 in [0.18,0.2,0.22,0.24,0.25,0.26,0.28]:
        for thresh2 in [0.3,0.32,0.34,0.36,0.38,0.4,0.42]:
            for thresh3 in [0.18,0.2,0.22,0.24,0.26,0.28,0.3]:
                y_pred = combine_predictions(x_train,y,y1,y2,thresh,thresh1,thresh2,thresh3)
                f2scorelistfiner.append([thresh,thresh1,thresh2,thresh3,fbeta_score(y_train,y_pred , beta=2, average='samples')])

In [70]:
f2scorelistfiner.sort(key=lambda x: x[4],reverse=True)
pd.DataFrame(f2scorelistfiner[:10])

Unnamed: 0,0,1,2,3,4
0,0.18,0.22,0.38,0.22,0.930067
1,0.18,0.22,0.42,0.22,0.930053
2,0.18,0.24,0.38,0.22,0.930052
3,0.18,0.22,0.4,0.22,0.930051
4,0.16,0.22,0.38,0.22,0.930042
5,0.18,0.22,0.36,0.22,0.930039
6,0.18,0.24,0.42,0.22,0.930036
7,0.18,0.24,0.4,0.22,0.930034
8,0.18,0.25,0.38,0.22,0.930033
9,0.16,0.24,0.38,0.22,0.930028


In [71]:
f2scorelistfiner[0][:4]

[0.18, 0.22, 0.38, 0.22]

In [72]:
#Test set
x_train = []
x_val = []
x_test = np.zeros((61191,64,64,3), np.float32)
y_train = []

df_test = pd.read_csv('sample_submission_v2.csv')

i = 0 
for f, tags in tqdm(df_test.values, miniters=1000):
    img = cv2.imread('test-jpg/{}.jpg'.format(f))
    x_test[i,:,:,:] = np.array(cv2.resize(img, (64, 64)),np.float32)/255.#139 minimum size for inception
    i+=1
print(x_test.shape)

100%|██████████| 61191/61191 [08:12<00:00, 124.20it/s]

(61191, 64, 64, 3)





In [73]:
x_test -= train_mean

y = model.predict(x_test,batch_size=128)
y1 = model1.predict(x_test,batch_size=128)
y2 = model2.predict(x_test,batch_size=128)

In [74]:
thresh,thresh1,thresh2,thresh3 = f2scorelistfiner[0][:4]
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [75]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_64_3net_1.csv', index=False)
#test set score:0.92298

In [76]:
thresh,thresh1,thresh2,thresh3 = f2scorelistfiner[1][:4]
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [77]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_64_3net_2.csv', index=False)
#test set score:0.92300

In [78]:
thresh,thresh1,thresh2,thresh3 = f2scorelistfiner[2][:4]
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [79]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_64_3net_3.csv', index=False)
#test set score:0.92285

In [80]:
thresh,thresh1,thresh2,thresh3 = f2scorelistfiner[3][:4]
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [81]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_64_3net_4.csv', index=False)
#test set score: 0.92300

In [82]:
thresh,thresh1,thresh2,thresh3 = f2scorelistfiner[4][:4]
y_pred = combine_predictions(x_test,y,y1,y2,thresh,thresh1,thresh2,thresh3)
print(y_pred.shape)

(61191, 17)


In [83]:
labels_np = np.array(labels)
preds = [' '.join(labels_np[np.array(y_pred[i,:],bool)]) for i in range(y_pred.shape[0])]
subm = pd.DataFrame()
subm['image_name'] = df_test.values[:,0]
subm['tags'] = preds
subm.to_csv('submission_64_3net_5.csv', index=False)
#test set score:0.92308