In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
from tqdm import tqdm
import gdal 

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Merge
from keras.utils.vis_utils import model_to_dot

import cv2

import cv2
from tqdm import tqdm
from IPython.display import SVG

import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

Using TensorFlow backend.


In [2]:
input_loc = 'input/'
train_files = input_loc + 'train_jpg_features/'
test_files = input_loc + 'test_jpg_features/'
df_train = pd.read_csv(input_loc + 'train_v2.csv')
feature_map = ['rgb', 'nvdi', 'ndwi', 'rgb_edge', 'nvdi_edge', 'ndwi_edge']

In [3]:
#Create a small sample to test out the model
df_train = pd.read_csv(input_loc + 'train.csv')
size_percent = .05
df_train = df_train.sample(n=int(len(df_train)*size_percent))

flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

y_train_tags = []
x_train = {
            'rgb':[],
            'nvdi': [],
            'ndwi': [],
            'rgb_edge': [],
            'nvdi_edge': [],
            'ndwi_edge': []
          }

for img_name, tags in tqdm(df_train.values, miniters=200):
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    y_train_tags.append(targets)
    
    for img_type in feature_map:
        img = cv2.imread(train_files + '%s_%s.jpg' %(img_name, feature_map[0]))
        x_train[img_type].append(cv2.resize(img, (128,128)))


100%|██████████| 2023/2023 [00:30<00:00, 66.71it/s]


In [4]:
#Create variables from small sample
split = int((len(df_train)*.4))

y_train_tags = np.array(y_train_tags, np.uint8)
for key in tqdm(x_train, miniters=1):
    x_train[key] = np.array(x_train[key], np.float16)/255.

x_train_rgb, x_valid_rgb = x_train['rgb'][:split], x_train['rgb'][split:]
x_train_nvdi, x_valid_nvdi = x_train['nvdi'][:split], x_train['nvdi'][split:]
x_train_ndwi, x_valid_ndwi = x_train['ndwi'][:split], x_train['ndwi'][split:]
x_train_rgb_edge, x_valid_rgb_edge = x_train['rgb_edge'][:split], x_train['rgb_edge'][split:]
x_train_nvdi_edge, x_valid_nvdi_edge = x_train['nvdi_edge'][:split], x_train['nvdi_edge'][split:]
x_train_ndwi_edge, x_valid_ndwi_edge = x_train['ndwi_edge'][:split], x_train['ndwi_edge'][split:]
y_train, y_valid = y_train_tags[:split], y_train_tags[split:]

print("x_train_rbg: %s, x_valid_rbg: %s" %(len(x_train_rgb), len(x_valid_rgb)))
print("y_train: %s, y_valid: %s" %(len(y_train), len(y_valid)))

100%|██████████| 6/6 [00:09<00:00,  1.60s/it]

x_train_rbg: 809, x_valid_rbg: 1214
y_train: 809, y_valid: 1214





In [6]:
from keras.callbacks import ModelCheckpoint

filepath="weights.best.branch6Model"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True)

print("RGB_BRANCH.ADD")
rgb_branch = Sequential()
rgb_branch.add(Conv2D(32, kernel_size=(2,2),
             activation='relu',
             input_shape=(128, 128, 3)
             ))
rgb_branch.add(Conv2D(32, (3, 3), activation='relu'))
rgb_branch.add(MaxPooling2D(pool_size=(2, 2)))
rgb_branch.add(Dropout(0.25))
rgb_branch.add(Dense(128, activation='relu'))
rgb_branch.add(Dropout(0.5))
print(rgb_branch.summary())
print("NVDI_BRANCH.ADD")
nvdi_branch = Sequential()
nvdi_branch.add(Conv2D(32, kernel_size=(2,2),
             activation='relu',
             input_shape=(128, 128, 3)
             ))
nvdi_branch.add(Conv2D(32, (3, 3), activation='relu'))
nvdi_branch.add(MaxPooling2D(pool_size=(2, 2)))
nvdi_branch.add(Dropout(0.25))
nvdi_branch.add(Dense(128, activation='relu'))
nvdi_branch.add(Dropout(0.5))

print("NDWI_BRANCH.ADD")
ndwi_branch = Sequential()
ndwi_branch = Sequential()
ndwi_branch.add(Conv2D(32, kernel_size=(2,2),
             activation='relu',
             input_shape=(128, 128, 3)
             ))
ndwi_branch.add(Conv2D(32, (3, 3), activation='relu'))
ndwi_branch.add(MaxPooling2D(pool_size=(2, 2)))
ndwi_branch.add(Dropout(0.25))
ndwi_branch.add(Dense(128, activation='relu'))
ndwi_branch.add(Dropout(0.5))

print("RGB_EDGE_BRANCH.ADD")
rgb_edge_branch = Sequential()
rgb_edge_branch.add(Conv2D(16, kernel_size=(2,2),
             activation='relu',
             input_shape=(128, 128, 3)
             ))
rgb_edge_branch.add(Conv2D(32, (3, 3), activation='relu'))
rgb_edge_branch.add(MaxPooling2D(pool_size=(2,2)))
rgb_edge_branch.add(Dropout(0.25))
rgb_edge_branch.add(Dense(128, activation='sigmoid'))
rgb_edge_branch.add(Dropout(0.5))

print("NVDI_EDGE_BRANCH.ADD")
nvdi_edge_branch = Sequential()
nvdi_edge_branch.add(Conv2D(16, kernel_size=(2,2),
             activation='relu',
             input_shape=(128, 128, 3)
             ))
nvdi_edge_branch.add(Conv2D(32, (3, 3), activation='relu'))
nvdi_edge_branch.add(MaxPooling2D(pool_size=(2,2)))
nvdi_edge_branch.add(Dropout(0.25))
nvdi_edge_branch.add(Dense(128, activation='relu'))
nvdi_edge_branch.add(Dropout(0.5))

print("NDWI_EDGE_BRANCH.ADD")
ndwi_edge_branch = Sequential()
ndwi_edge_branch.add(Conv2D(16, kernel_size=(2,2),
             activation='relu',
             input_shape=(128, 128, 3)
             ))
ndwi_edge_branch.add(Conv2D(32, (3, 3), activation='relu'))
ndwi_edge_branch.add(MaxPooling2D(pool_size=(2,2)))
ndwi_edge_branch.add(Dropout(0.25))
ndwi_edge_branch.add(Dense(128, activation='sigmoid'))
ndwi_edge_branch.add(Dropout(0.5))

print("MODEL.MERGE")
model = Sequential()
merged = Merge([rgb_branch, nvdi_branch, ndwi_branch, rgb_edge_branch, nvdi_edge_branch, ndwi_edge_branch], mode = 'sum')

model.add(merged)
model.add(Flatten())
model.add(Dense(17, activation='sigmoid'))

print("MODEL.COMPILE")
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print("MODEL.FIT")
model.fit([x_train_rgb, x_train_nvdi, x_train_ndwi, x_train_rgb_edge, x_train_nvdi_edge, x_train_ndwi_edge], 
          y_train,
          batch_size=128,
          epochs=1,
          verbose=1)

RGB_BRANCH.ADD
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 127, 127, 32)      416       
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 125, 125, 32)      9248      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 62, 62, 32)        0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 62, 62, 32)        0         
_________________________________________________________________
dense_8 (Dense)              (None, 62, 62, 128)       4224      
_________________________________________________________________
dropout_14 (Dropout)         (None, 62, 62, 128)       0         
Total params: 13,888.0
Trainable params: 13,888.0
Non-trainable params: 0.0
___________________________________________________



In [30]:
from sklearn.metrics import fbeta_score

p_valid = model.predict([x_valid_rgb, x_valid_nvdi, x_valid_ndwi, x_valid_rgb_edge, x_valid_nvdi_edge, x_valid_ndwi_edge], batch_size=128)
score = fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples')
print(score)

0.657506315582
