In [None]:
import os
import pandas as pd
from skimage.io import imread
from skimage.morphology import label
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from keras.models import *
from keras.layers import *
from keras.optimizers import *
import random

DATA_PATH   = '../input/airbus-ship-detection/'
TRAIN_PATH  = DATA_PATH+'train_v2/'
TEST_PATH   = DATA_PATH+'test_v2/'

In [None]:
df = pd.read_csv(DATA_PATH+'train_ship_segmentations_v2.csv')
df.head()

In [None]:
df.shape

In [None]:
plt.bar(['Ships', 'No Ships'], 
    [len(df[~df.EncodedPixels.isna()].ImageId.unique()),
    len(df[df.EncodedPixels.isna()].ImageId.unique())]);
plt.ylabel('Number of Images');

In [None]:
df = df[df['ImageId'] != '6384c3e78.jpg']
df.shape

In [None]:
def area_isnull(x):
    if x == x:
        return 0
    else:
        return 1

In [None]:
df['isnan'] = df['EncodedPixels'].apply(area_isnull)
df['isnan'].value_counts()

In [None]:
df = df.sort_values('isnan', ascending=False)
df = df.iloc[100000:]

In [None]:
def rle_to_mask(rle_list, SHAPE):
    tmp_flat = np.zeros(SHAPE[0]*SHAPE[1])
    if len(rle_list) == 1:
        mask = np.reshape(tmp_flat, SHAPE).T
    else:
        strt = rle_list[::2]
        length = rle_list[1::2]
        for i,v in zip(strt,length):
            tmp_flat[(int(i)-1):(int(i)-1)+int(v)] = 255
        mask = np.reshape(tmp_flat, SHAPE).T
    return mask

In [None]:
def calc_area_for_rle(rle_str):
    rle_list = [int(x) if x.isdigit() else x for x in str(rle_str).split()]
    if len(rle_list) == 1:
        return 0
    else:
        area = np.sum(rle_list[1::2])
        return area

In [None]:
df['area'] = df['EncodedPixels'].apply(calc_area_for_rle)

In [None]:
df_isship = df[df['area'] > 0]
df_isship.shape

In [None]:
df_smallarea = df_isship['area'][df_isship['area'] < 10]
df_smallarea.shape

In [None]:
df_smallarea.shape[0]/df_isship.shape[0]

In [None]:
gp = df.groupby('ImageId').sum()
gp = gp.reset_index()
gp.head()

In [None]:
def calc_class(area):
    area = area / (768*768)
    if area == 0:
        return 0
    elif area < 0.005:
        return 1
    elif area < 0.015:
        return 2
    elif area < 0.025:
        return 3
    elif area < 0.035:
        return 4
    elif area < 0.045:
        return 5
    else:
        return 6

In [None]:
gp['class'] = gp['area'].apply(calc_class)

In [None]:
gp['class'].value_counts()

In [None]:
train, val = train_test_split(gp, test_size=0.01, stratify=gp['class'].tolist())

In [None]:
train_isship_list = train['ImageId'][train['isnan']==0].tolist()
train_isship_list = random.sample(train_isship_list, len(train_isship_list))
train_nanship_list = train['ImageId'][train['isnan']==1].tolist()
train_nanship_list = random.sample(train_nanship_list, len(train_nanship_list))

val_isship_list = val['ImageId'][val['isnan']==0].tolist()
val_nanship_list = val['ImageId'][val['isnan']==1].tolist()

In [None]:
len(train_isship_list),len(train_nanship_list)

In [None]:
from keras.preprocessing.image import ImageDataGenerator
dg_args = dict(featurewise_center = False, 
                  samplewise_center = False,
                  rotation_range = 15, 
                  width_shift_range = 0.1, 
                  height_shift_range = 0.1, 
                  shear_range = 0.01,
                  zoom_range = [0.9, 1.25],  
                  horizontal_flip = True, 
                  vertical_flip = True,
                  brightness_range = [0.7, 1.3],
                  fill_mode = 'reflect',
                   data_format = 'channels_last')

image_gen = ImageDataGenerator(**dg_args)
label_gen = ImageDataGenerator(**dg_args)


def mygenerator(isship_list, nanship_list, batch_size, cap_num):
    train_img_names_nanship = nanship_list[:cap_num]
    train_img_names_isship = isship_list[:cap_num]
    k = 0
    while True:
        if k+batch_size//2 >= cap_num:
            k = 0
        batch_img_names_nan = train_img_names_nanship[k:k+batch_size//2]
        batch_img_names_is = train_img_names_isship[k:k+batch_size//2]
        batch_img = []
        batch_mask = []
        for name in batch_img_names_nan:
            tmp_img = imread(TRAIN_PATH + name)
            batch_img.append(tmp_img)
            batch_mask.append(0)
        for name in batch_img_names_is:
            tmp_img = imread(TRAIN_PATH + name)
            batch_img.append(tmp_img)
            batch_mask.append(1)
        img = np.stack(batch_img, axis=0)
        mask = np.stack(batch_mask, axis=0)

        g_x = image_gen.flow(img, mask,
                             batch_size = img.shape[0], 
                             shuffle=True,
                             seed=None)
        
        
       
        
        imgaug, maskaug = next(g_x)
        
        k += batch_size//2
        
        yield imgaug/ 255.0, maskaug

In [None]:
BATCH_SIZE = 4
CAP_NUM = min(len(train_isship_list),len(train_nanship_list))
datagen = mygenerator(train_isship_list, train_nanship_list, batch_size=BATCH_SIZE, cap_num=CAP_NUM)
valgen = mygenerator(val_isship_list, val_nanship_list, batch_size=50, cap_num=CAP_NUM)


numvalimages = 100
val_x, val_y = next(valgen)
val_y

In [None]:
inputs = Input(shape=(768,768,3))

c1 = Conv2D(32, (3, 3), activation='relu', padding='same') (inputs)
p1 = MaxPooling2D((2, 2)) (c1)  # 384x384

c2 = Conv2D(64, (3, 3), activation='relu', padding='same') (p1)
p2 = MaxPooling2D((2, 2)) (c2)  # 192x192

c3 = Conv2D(128, (3, 3), activation='relu', padding='same') (p2)
p3 = MaxPooling2D((2, 2)) (c3)  # 96x96

c4 = Conv2D(256, (3, 3), activation='relu', padding='same') (p3)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)  # 48x48

c5 = Conv2D(512, (3, 3), activation='relu', padding='same') (p4)
p5 = MaxPooling2D(pool_size=(2, 2)) (c5) # 24x24

c6 = Conv2D(512, (3, 3), activation='relu', padding='same') (p5)
p6 = MaxPooling2D(pool_size=(2, 2)) (c6) # 12x12

c7 = Conv2D(512, (3, 3), activation='relu', padding='same') (p6)
p7 = MaxPooling2D(pool_size=(2, 2)) (c7) # 6x6

flatp7 = Flatten() (p6)
d1 = Dense(128, activation='relu') (flatp7)
#d2 = Dropout(0.2)(d1)

d3 = Dense(1, activation='sigmoid') (d1)

model = Model(inputs=[inputs], outputs=[d3])
model.summary()

In [None]:
from keras.optimizers import Adam
opt=Adam(1e-4, decay=0.0)
model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['acc'])

In [None]:
epochs=20
history=model.fit_generator(datagen,
                            steps_per_epoch=100,
                            epochs=epochs,
                            validation_data=(val_x, val_y),
                            verbose=1)

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(15,15))

axs[0].plot(history.history['loss'])
axs[0].plot(history.history['val_loss'])
axs[0].title.set_text('Training Loss vs Validation Loss')
axs[0].legend(['Train', 'Validation'])

axs[1].plot(history.history['acc'])
axs[1].plot(history.history['val_acc'])
axs[1].title.set_text('Training Accuracy vs Validation Accuracy')
axs[1].legend(['Train', 'Validation'])

In [None]:
model.save('seg_model_ship_classifier.h5')

In [None]:
val_list = val['ImageId'].tolist()
train_list = train['ImageId'].tolist()

In [None]:
def create_data(image_list):
    batch_img = []
    batch_mask = []
    for name in image_list:
        tmp_img = imread(TRAIN_PATH + name)
        batch_img.append(tmp_img)
        mask_list = df['EncodedPixels'][df['ImageId'] == name].tolist()
        one_mask = np.zeros((768, 768, 1))
        for item in mask_list:
            rle_list = str(item).split()
            tmp_mask = rle_to_mask(rle_list, (768, 768))
            one_mask[:,:,0] += tmp_mask
        if np.any(one_mask):
            batch_mask.append(1)
        else:
            batch_mask.append(0)
    img = np.stack(batch_img, axis=0)
    mask = np.stack(batch_mask, axis=0)
    img = img / 255.0
    return img, mask

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
image_list = val_list[0:10]

for i in range(len(image_list)):
    img = imread(TRAIN_PATH + image_list[i])
    input_img, gt_mask = create_data([image_list[i]])
    pred_mask = model.predict(input_img)
    
    fig = plt.figure(figsize=(10,10))
    plt.imshow(img)
    plt.xlabel(pred_mask)
    plt.ylabel(gt_mask)