In [2]:
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import os

from skimage.io import imread, imsave

from keras import applications
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Dropout, Flatten, GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam

from sklearn.metrics import fbeta_score, precision_score
from sklearn.model_selection import train_test_split
import pandas as pd
import cv2

%matplotlib inline

In [3]:
source_dir = os.path.join('train-jpg', 'train-jpg')

In [4]:
x_train = []
y_train = []

train_data = pd.read_csv('train_v2.csv/train_v2.csv')

label_enum = 0
label_map = {}
labels = {}
for tags in train_data['tags']:
    classes = tags.split(" ")
    for label in classes:
        if label not in label_map and label != "cloudy" and label != "partly_cloudy":
            label_map[label] = label_enum
            labels[label_enum] = label
            label_enum = label_enum + 1
    if label_enum == 17:
        break

for image, tags in train_data.values:
    image_path = source_dir+"/"+image+".jpg"
    img = cv2.imread(image_path)
    img = cv2.resize(img, (139, 139))
    one_hot_labels = np.zeros(len(label_map))
    cloudy = False
    for label in tags.split(' '):
        if label == "cloudy" or label == "partly_cloudy":
            cloudy = True
        else:
            one_hot_labels[label_map[label]] = 1
    if not cloudy:
        x_train.append(img/255.0)
        y_train.append(one_hot_labels)
    
y_train = np.array(y_train, np.uint8)
x_train = np.array(x_train, np.float32)

print(x_train.shape)
print(y_train.shape)

(31129, 139, 139, 3)
(31129, 16)


In [5]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size =0.2)
x_test, x_val, y_test, y_val = train_test_split(x_val, y_val, test_size =0.5)

In [6]:
pretrained_model = applications.InceptionV3(include_top=False, weights='imagenet', input_shape = (139, 139, 3))
pretrained_model.trainable = False

In [7]:
input_layer = pretrained_model.input
model_output = pretrained_model.output
pooling_layer = GlobalAveragePooling2D()(model_output)
dense_layer = Dense(2048, activation='relu')(pooling_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(17, activation='sigmoid')(dropout_layer)
model = Model(inputs=input_layer, outputs=output_layer)

In [8]:
optimizer = Adam()
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=128,verbose=2, epochs=10, shuffle=True)

Epoch 1/20
195/195 - 69s - loss: 0.1454 - accuracy: 0.0673 - val_loss: 0.1174 - val_accuracy: 0.0238 - 69s/epoch - 356ms/step
Epoch 2/20
195/195 - 64s - loss: 0.1150 - accuracy: 0.0545 - val_loss: 0.1121 - val_accuracy: 0.0797 - 64s/epoch - 330ms/step
Epoch 3/20
195/195 - 64s - loss: 0.1085 - accuracy: 0.0573 - val_loss: 0.1114 - val_accuracy: 0.0305 - 64s/epoch - 329ms/step
Epoch 4/20
195/195 - 64s - loss: 0.1036 - accuracy: 0.0620 - val_loss: 0.1102 - val_accuracy: 0.0395 - 64s/epoch - 330ms/step
Epoch 5/20
195/195 - 64s - loss: 0.0998 - accuracy: 0.0744 - val_loss: 0.1108 - val_accuracy: 0.1137 - 64s/epoch - 330ms/step
Epoch 6/20
195/195 - 64s - loss: 0.0953 - accuracy: 0.0899 - val_loss: 0.1102 - val_accuracy: 0.1079 - 64s/epoch - 329ms/step
Epoch 7/20
195/195 - 64s - loss: 0.0909 - accuracy: 0.1111 - val_loss: 0.1122 - val_accuracy: 0.0752 - 64s/epoch - 329ms/step
Epoch 8/20
195/195 - 64s - loss: 0.0866 - accuracy: 0.1198 - val_loss: 0.1134 - val_accuracy: 0.1340 - 64s/epoch - 330

<keras.callbacks.History at 0x2ac3523e3e80>

In [8]:
model.save_weights("model_cloudy_pre_final.h5")

In [9]:
for layer in pretrained_model.layers[:176]:
   layer.trainable = False
for layer in pretrained_model.layers[176:]:
   layer.trainable = True

input_layer = pretrained_model.input
model_output = pretrained_model.output
pooling_layer = GlobalAveragePooling2D()(model_output)
dense_layer = Dense(2048, activation='relu')(pooling_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(17, activation='sigmoid')(dropout_layer)
model = Model(inputs=input_layer, outputs=output_layer)
model.load_weights("model_cloudy_pre_final.h5")

adam = Adam()
model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=128,verbose=2, epochs=10, shuffle=True)

  super().__init__(name, **kwargs)


Epoch 1/20
195/195 - 150s - loss: 0.1401 - val_loss: 0.2169 - 150s/epoch - 771ms/step
Epoch 2/20
195/195 - 143s - loss: 0.1069 - val_loss: 0.1346 - 143s/epoch - 733ms/step
Epoch 3/20
195/195 - 143s - loss: 0.0982 - val_loss: 0.1013 - 143s/epoch - 733ms/step
Epoch 4/20
195/195 - 144s - loss: 0.0916 - val_loss: 0.1017 - 144s/epoch - 737ms/step
Epoch 5/20
195/195 - 143s - loss: 0.0847 - val_loss: 0.1328 - 143s/epoch - 734ms/step
Epoch 6/20
195/195 - 143s - loss: 0.0802 - val_loss: 0.1169 - 143s/epoch - 732ms/step
Epoch 7/20
195/195 - 143s - loss: 0.0742 - val_loss: 0.1132 - 143s/epoch - 733ms/step
Epoch 8/20
195/195 - 143s - loss: 0.0687 - val_loss: 0.1642 - 143s/epoch - 733ms/step
Epoch 9/20
195/195 - 143s - loss: 0.0707 - val_loss: 0.1320 - 143s/epoch - 736ms/step
Epoch 10/20
195/195 - 142s - loss: 0.0655 - val_loss: 0.1239 - 142s/epoch - 730ms/step
Epoch 11/20
195/195 - 142s - loss: 0.0552 - val_loss: 0.1586 - 142s/epoch - 729ms/step
Epoch 12/20
195/195 - 143s - loss: 0.0489 - val_loss

<keras.callbacks.History at 0x2ac4cd823c10>

In [10]:
model.save_weights('model_cloudy_final_2.h5')

In [None]:
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(train_accuracy))

plt.plot(epochs, train_accuracy, 'r', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
plt.plot(epochs, train_loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()
plt.show()

In [10]:
y_pred = model.predict(x_test, batch_size = 128, verbose=2)

25/25 - 8s - 8s/epoch - 320ms/step


In [11]:
f2_score = fbeta_score(y_test, np.array(y_pred)>0.2, beta=2, average='samples')
print("F2 score:",f2_score)

F2 score: 0.9036307461192602


In [12]:
y_pred = np.array(y_pred)>0.2
total = np.sum(y_test,axis = 0)
tp = np.sum(y_test*y_pred,axis=0)
tn = np.sum((1-y_test)*(1-y_pred),axis=0)
fp = np.sum((1-y_test)*y_pred,axis=0)
fn = np.sum(y_test*(1-y_pred),axis=0)

d = {'Total':total,'TP':tp,'TN':tn,'FP':fp,'FN':fn}
pd.DataFrame(d, index=label_map)


ValueError: Shape of passed values is (16, 5), indices imply (15, 5)

In [None]:
sum = np.add(tp, fp)
precision = np.divide(tp, sum)
recall = np.divide(tp, np.add(tp, fn))
accuracy = np.divide(np.add(tp, tn), np.add(fp, np.add(fn, np.add(tp, tn))))
f1 = 2*(np.divide(np.multiply(precision, recall), np.add(precision, recall)))
f2 = (np.divide(5*np.multiply(precision, recall), np.add(4*precision, recall)))
evalution_metrics = {'Accuracy':accuracy,'Precision':precision,'Recall':recall,'f1':f1, 'f2': f2}
pd.DataFrame(evalution_metrics, index=label_map)