In [1]:
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import os

from skimage.io import imread, imsave

from keras import applications
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Dropout, Flatten, GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam

from sklearn.metrics import fbeta_score, precision_score
from sklearn.model_selection import train_test_split
import pandas as pd
import cv2

%matplotlib inline

In [2]:
source_dir = os.path.join('train-jpg', 'train-jpg')

In [6]:
x_train = []
y_train = []

train_data = pd.read_csv('train_v2.csv/train_v2.csv')

label_enum = 0
label_map = {}
labels = {}
for tags in train_data['tags']:
    classes = tags.split(" ")
    for label in classes:
        if label not in label_map and label != "cloudy" and label != "partly_cloudy":
            label_map[label] = label_enum
            labels[label_enum] = label
            label_enum = label_enum + 1
    if label_enum == 17:
        break

for image, tags in train_data.values:
    image_path = source_dir+"/"+image+".jpg"
    img = cv2.imread(image_path)
    img = cv2.resize(img, (139, 139))
    one_hot_labels = np.zeros(len(label_map))
    cloudy = False
    for label in tags.split(' '):
        if label == "cloudy" or label == "partly_cloudy":
            cloudy = True
        else:
            one_hot_labels[label_map[label]] = 1
    if not cloudy:
        x_train.append(img/255.0)
        y_train.append(one_hot_labels)
    
y_train = np.array(y_train, np.uint8)
x_train = np.array(x_train, np.float32)

print(x_train.shape)
print(y_train.shape)

['blooming', 'haze', 'clear', 'water', 'primary', 'cultivation', 'selective_logging', 'blow_down', 'agriculture', 'habitation', 'slash_burn', 'bare_ground', 'road', 'conventional_mine', 'artisinal_mine', 'partly_cloudy']
(31129, 139, 139, 3)
(31129, 15)


In [7]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size =0.2)
x_test, x_val, y_test, y_val = train_test_split(x_val, y_val, test_size =0.5)

In [8]:
pretrained_model = applications.InceptionV3(include_top=False, weights='imagenet', input_shape = (139, 139, 3))
pretrained_model.trainable = False

In [9]:
input_layer = pretrained_model.input
model_output = pretrained_model.output
pooling_layer = GlobalAveragePooling2D()(model_output)
dense_layer = Dense(2048, activation='relu')(pooling_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(17, activation='sigmoid')(dropout_layer)
model = Model(inputs=input_layer, outputs=output_layer)

In [10]:
optimizer = Adam()
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=128,verbose=2, epochs=10, shuffle=True)

Epoch 1/5
195/195 - 70s - loss: 0.1541 - accuracy: 0.2131 - val_loss: 0.1302 - val_accuracy: 0.0694 - 70s/epoch - 361ms/step
Epoch 2/5
195/195 - 65s - loss: 0.1224 - accuracy: 0.2196 - val_loss: 0.1292 - val_accuracy: 0.5149 - 65s/epoch - 333ms/step
Epoch 3/5
195/195 - 65s - loss: 0.1157 - accuracy: 0.2187 - val_loss: 0.1246 - val_accuracy: 0.1362 - 65s/epoch - 332ms/step
Epoch 4/5
195/195 - 65s - loss: 0.1108 - accuracy: 0.2124 - val_loss: 0.1248 - val_accuracy: 0.1580 - 65s/epoch - 332ms/step
Epoch 5/5
195/195 - 65s - loss: 0.1056 - accuracy: 0.2159 - val_loss: 0.1238 - val_accuracy: 0.1474 - 65s/epoch - 333ms/step


<keras.callbacks.History at 0x2b8e3c66f550>

In [None]:
model.save_weights('model_cloudy_final.h5')

In [None]:
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(train_accuracy))

plt.plot(epochs, train_accuracy, 'r', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
plt.plot(epochs, train_loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()
plt.show()

In [11]:
y_pred = model.predict(x_test, batch_size = 128, verbose=2)

25/25 - 8s - 8s/epoch - 328ms/step


In [12]:
from sklearn.metrics import fbeta_score, precision_score 
print("F2 score:",fbeta_score(y_test, np.array(y_pred)>0.2, beta=2, average='samples'))

F2 score: 0.918115579677878


In [None]:
y_pred = np.array(y_pred)>0.2
total = np.sum(y_test,axis = 0)
tp = np.sum(y_test*y_pred,axis=0)
tn = np.sum((1-y_test)*(1-y_pred),axis=0)
fp = np.sum((1-y_test)*y_pred,axis=0)
fn = np.sum(y_test*(1-y_pred),axis=0)

d = {'Total':total,'TP':tp,'TN':tn,'FP':fp,'FN':fn}
pd.DataFrame(d, index=label_map)


In [None]:
sum = np.add(tp, fp)
precision = np.divide(tp, sum)
recall = np.divide(tp, np.add(tp, fn))
accuracy = np.divide(np.add(tp, tn), np.add(fp, np.add(fn, np.add(tp, tn))))
f1 = 2*(np.divide(np.multiply(precision, recall), np.add(precision, recall)))
f2 = (np.divide(5*np.multiply(precision, recall), np.add(4*precision, recall)))
evalution_metrics = {'Accuracy':accuracy,'Precision':precision,'Recall':recall,'f1':f1, 'f2': f2}
pd.DataFrame(evalution_metrics, index=label_map)