In [1]:
import os
from IPython.display import clear_output

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from keras_preprocessing.image import ImageDataGenerator

import tensorflow as tf
from keras.applications.vgg19 import VGG19
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, GlobalAveragePooling2D


Using TensorFlow backend.


In [2]:
img_size = 64
classes=['haze','primary','agriculture','clear','water','road','habitation','cultivation',
'slash_burn','cloudy','partly_cloudy','blow_down','conventional_mine','artisinal_mine',
'blooming','selective_logging','bare_ground']

In [3]:
dir_path = "../input/planet-understanding-the-amazon-from-space/"
train_path = "../input/planet-understanding-the-amazon-from-space/train-jpg/"
test_path = "../input/planet-understanding-the-amazon-from-space/test-jpg-v2/"

In [4]:
df = pd.read_csv(dir_path+'train_v2.csv')
df['image_name'] = df['image_name'].apply(lambda x: x+'.jpg')
df['tags'] = df['tags'].map(lambda x: x.split(' '))
df.head()

Unnamed: 0,image_name,tags
0,train_0.jpg,"[haze, primary]"
1,train_1.jpg,"[agriculture, clear, primary, water]"
2,train_2.jpg,"[clear, primary]"
3,train_3.jpg,"[clear, primary]"
4,train_4.jpg,"[agriculture, clear, habitation, primary, road]"


In [5]:
train_labels,validation_labels=train_test_split(df,test_size=0.2,random_state=101)

In [6]:
train_labels.head()

Unnamed: 0,image_name,tags
31473,train_31473.jpg,"[clear, primary]"
11841,train_11841.jpg,"[clear, primary]"
15982,train_15982.jpg,"[agriculture, clear, habitation, primary, road]"
33835,train_33835.jpg,"[agriculture, haze, primary, road]"
19101,train_19101.jpg,"[blooming, clear, primary]"


In [7]:
validation_labels.head()

Unnamed: 0,image_name,tags
18906,train_18906.jpg,"[clear, primary, road, water]"
30561,train_30561.jpg,"[agriculture, clear, primary]"
31870,train_31870.jpg,[cloudy]
37561,train_37561.jpg,"[clear, primary]"
607,train_607.jpg,"[agriculture, clear, primary, road]"


In [8]:
aug_generator = ImageDataGenerator(rescale = 1./255, vertical_flip = True,
                                   horizontal_flip = True, height_shift_range = 0.1,
                                   width_shift_range = 0.1, rotation_range =10)

train_gen = aug_generator.flow_from_dataframe(dataframe=train_labels,directory=train_path,
                                              x_col='image_name',y_col='tags',
                                              target_size=(img_size,img_size),
                                              batch_size=64,class_mode='categorical',
                                              classes=classes)
valid_gen = aug_generator.flow_from_dataframe(dataframe=validation_labels,directory=train_path,
                                              x_col='image_name', y_col='tags',
                                              target_size=(img_size,img_size),
                                              batch_size=64,class_mode='categorical',
                                              classes=classes)

Found 32383 validated image filenames belonging to 17 classes.
Found 8096 validated image filenames belonging to 17 classes.


In [9]:
my_new_model=Sequential()

In [10]:
vgg19 = VGG19(include_top=False,input_shape=(img_size,img_size,3), weights='imagenet')
base_model = vgg19
x = base_model.output
x = Flatten()(x)
predictions = Dense(17, activation = 'sigmoid')(x)
my_new_model = Model(inputs = base_model.input, outputs = predictions)
for i in range(0,17) :
    my_new_model.layers[i].trainable = False
for i, layer in enumerate(my_new_model.layers):
    print(i, layer.name, layer.trainable)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
0 input_1 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_conv4 False
11 block3_pool False
12 block4_conv1 False
13 block4_conv2 False
14 block4_conv3 False
15 block4_conv4 False
16 block4_pool False
17 block5_conv1 True
18 block5_conv2 True
19 block5_conv3 True
20 block5_conv4 True
21 block5_pool True
22 flatten_1 True
23 dense_1 True


In [11]:
my_new_model.compile(optimizer='adam',loss='binary_crossentropy', metrics =['accuracy'])

In [12]:
my_new_model.fit_generator(train_gen,steps_per_epoch=506,epochs=8,verbose=1,
                            validation_data=valid_gen,validation_steps=127,workers=4)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7fbe212e6da0>

In [13]:
# serialize model to JSON
model_json = my_new_model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
my_new_model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


In [14]:
test_img = pd.DataFrame({'image_name':os.listdir(test_path)})
test_img.head()

Unnamed: 0,image_name
0,test_5605.jpg
1,file_11262.jpg
2,file_11452.jpg
3,file_1295.jpg
4,test_21329.jpg


In [15]:
#Checking on test file
no_aug_generator = ImageDataGenerator(rescale = 1./255)

test_gen = no_aug_generator.flow_from_dataframe(dataframe=test_img,directory=test_path,
                                          x_col='image_name',target_size=(img_size,img_size),
                                          batch_size=1,class_mode=None)

Found 61191 validated image filenames.


In [16]:
predictions = my_new_model.predict_generator(test_gen,steps=len(test_gen),verbose=1)
predictions = np.rint(predictions)
predictions.shape



(61191, 17)

In [17]:
df_pred = pd.DataFrame(predictions)
df_pred.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [18]:
tags = []
for i,row in df_pred.iterrows():
    clear_output(wait=True)
    a = np.where(row==1)
    b = np.array(classes)[a[0]]
    tags.append(' '.join(b))
    print(str(i)+' / '+str(df_pred.shape[0]))

8319 / 61191


In [19]:
df_pred['tags'] = tags
df_pred.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,tags
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,road conventional_mine bare_ground
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,blow_down conventional_mine
2,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,haze road conventional_mine artisinal_mine
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,partly_cloudy conventional_mine
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,road conventional_mine bare_ground


In [20]:
df_test = pd.concat([test_img.image_name.apply(lambda x: x[:-4]),df_pred.tags],axis=1)
df_test.head()

Unnamed: 0,image_name,tags
0,test_5605,road conventional_mine bare_ground
1,file_11262,blow_down conventional_mine
2,file_11452,haze road conventional_mine artisinal_mine
3,file_1295,partly_cloudy conventional_mine
4,test_21329,road conventional_mine bare_ground


In [21]:
df_test.to_csv('sub1.csv', index = False)