In [2]:
import numpy as np # linear algebra
import scipy as scipy
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
import dill

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

import cv2
from tqdm import tqdm
from PIL import Image

In [6]:
x_test = []

for f in range(40669):
    filename = '../DB/test-jpg/test_{}.jpg'.format(f)
    img = Image.open(filename)
    img.thumbnail((64, 64))

    img_array = np.asarray(img.convert("RGB"), dtype=np.float32) / 255
    
    x_test.append(img_array)

    
for f in range(20522):
    filename = '../DB/test-jpg-additional/file_{}.jpg'.format(f)
    img = Image.open(filename)
    img.thumbnail((64, 64))

    img_array = np.asarray(img.convert("RGB"), dtype=np.float32) / 255
    
    x_test.append(img_array)

    
x_test = np.asarray(x_test)

print(x_test.shape)


(61191, 64, 64, 3)


In [7]:
import math

def add_unet_block(depth, width, drop_constant, act, previous_layer):
    prev = previous_layer
    toConcatLater = list()

    for cur_depth in range(depth):
        if cur_depth != 0:
            prev = MaxPooling2D(pool_size=(2, 2))(prev)
        conv_width = int(width * math.pow(2, cur_depth))
        conv1 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(prev)
        conv2 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(conv1)
        prev = conv2
        toConcatLater.append(conv2)
    prev = UpSampling2D(size=(2, 2))(prev)
    for cur_depth in reversed(range(depth - 1)):
        merged = concatenate([prev, toConcatLater[cur_depth]], axis=3)
        drop = Dropout(drop_constant)(merged)
        conv_width = int(width * math.pow(2, cur_depth))
        conv1 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(drop)
        conv2 = Conv2D(conv_width, (3, 3), activation=act, padding='same')(conv1)
        if cur_depth != 0:
            up = UpSampling2D(size=(2, 2))(conv2)
            prev = up
        else:
            prev = conv2
    return prev


In [8]:
from keras.layers import Input, UpSampling2D
from keras.layers.merge import concatenate
from keras.models import Model

In [9]:
input_layer = Input(shape=(64, 64, 3))
unet_out = add_unet_block(3, 32, 0.4, 'relu', input_layer)
flat = Flatten()(unet_out)
dense1 = Dense(512, activation='relu')(flat)
dense2 = Dense(17, activation='sigmoid')(dense1)
unet = Model(inputs=[input_layer], outputs=dense2)

In [10]:
unet.load_weights('weights.best.hdf5')

In [11]:
predictions = unet.predict(x_test, batch_size=128)

In [12]:
def map_predictions(predictions, labels_map, thresholds):
    """
    Return the predictions mapped to their labels
    :param predictions: the predictions from the predict() method
    :param labels_map: the map
    :param thresholds: The threshold of each class to be considered as existing or not existing
    :return: the predictions list mapped to their labels
    """
    predictions_labels = []
    for prediction in predictions:
        labels = [labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i]]
        predictions_labels.append(labels)

    return predictions_labels

In [13]:
unet_label_map = {0: 'agriculture',
 1: 'artisinal_mine',
 2: 'bare_ground',
 3: 'blooming',
 4: 'blow_down',
 5: 'clear',
 6: 'cloudy',
 7: 'conventional_mine',
 8: 'cultivation',
 9: 'habitation',
 10: 'haze',
 11: 'partly_cloudy',
 12: 'primary',
 13: 'road',
 14: 'selective_logging',
 15: 'slash_burn',
 16: 'water'}


In [14]:
thresholds = [0.2] * len(unet_label_map)
predicted_labels = map_predictions(predictions, unet_label_map, thresholds)

In [15]:
x_test_filename = ['test_{}.jpg'.format(i) for i in range(40669)]
x_test_filename2 = ['file_{}.jpg'.format(i) for i in range(20522)]
x_test_filename = np.hstack((x_test_filename, x_test_filename2))
print(x_test_filename.shape)

(61191,)


In [16]:
tags_list = [None] * len(predicted_labels)
for i, tags in enumerate(predicted_labels):
    tags_list[i] = ' '.join(map(str, tags))

final_data = [[filename.split(".")[0], tags] for filename, tags in zip(x_test_filename, tags_list)]

In [17]:
final_df = pd.DataFrame(final_data, columns=['image_name', 'tags'])
final_df.head()

Unnamed: 0,image_name,tags
0,test_0,clear primary
1,test_1,clear primary
2,test_2,partly_cloudy primary
3,test_3,agriculture clear cultivation primary
4,test_4,cloudy partly_cloudy primary


In [18]:
final_df.to_csv('ekami_unet_verify2.csv', index=False)

In [19]:
print(predictions[0])

[  3.85089591e-03   5.26798267e-08   6.65377956e-05   8.44099186e-03
   1.05370877e-04   9.99781072e-01   2.66136169e-07   1.36377256e-08
   3.61190410e-04   1.17070286e-03   6.23417727e-04   4.16171351e-05
   9.99996364e-01   5.81461238e-03   4.30325978e-03   7.04003969e-06
   4.65501435e-02]


In [20]:
print(x_test.shape)
print(x_test_filename.shape)

(61191, 64, 64, 3)
(61191,)
