In [1]:
# -*- coding: utf-8 -*-
from os import path, mkdir
import numpy as np
np.random.seed(1)
import random
random.seed(1)
import timeit
import cv2
from tqdm import tqdm
from skimage import measure
from multiprocessing import Pool
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.neighbors import KDTree
from skimage.morphology import watershed
from skimage.morphology import square, dilation
import pandas as pd
import math
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from lgb_features import *

In [2]:
def get_inputs(imageid, pre_df, seg_df = None):
    inputs = []    
    
    pred_msk = get_mask(imageid, pre_df)
    y_pred = measure.label(pred_msk, neighbors=8, background=0)
    ship_pro = pre_df.loc[imageid, 'p_ship'].mean()
    
    props = measure.regionprops(y_pred)
    pred_labels = measure.label(y_pred, neighbors=8, background=0)
    pred_props = measure.regionprops(y_pred)
    init_count = len(pred_props)

    coords = [pr.centroid for pr in pred_props]
#     print('len(coords)', len(coords))
    if len(coords) > 0:
#         print('make neighbors')
        t = KDTree(coords)
        neighbors100 = t.query_radius(coords, r=50)
        neighbors200 = t.query_radius(coords, r=100)
        neighbors300 = t.query_radius(coords, r=150)
        neighbors400 = t.query_radius(coords, r=200)
        areas = np.asarray([pr.area for pr in props])
        med_area = np.median(areas)
        max_area = np.max(areas)
    
    for i in range(len(pred_props)):
        cur_prop = pred_props[i]
        is_on_border = 1 * ((cur_prop.bbox[0] <= 1) | (cur_prop.bbox[1] <= 1) | (cur_prop.bbox[2] >= y_pred.shape[0] - 1) | (cur_prop.bbox[3] >= y_pred.shape[1] - 1))
  
        msk_reg = pred_labels[cur_prop.bbox[0]:cur_prop.bbox[2], cur_prop.bbox[1]:cur_prop.bbox[3]] == i+1
        pred_reg = y_pred[cur_prop.bbox[0]:cur_prop.bbox[2], cur_prop.bbox[1]:cur_prop.bbox[3]]
        
        contours = cv2.findContours((msk_reg * 255).astype(dtype=np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        if len(contours[1]) > 0:
            cnt = contours[1][0]
            min_area_rect = cv2.minAreaRect(cnt)
        
        inp = []
        inp.append(ship_pro)
        inp.append(cur_prop.area)
        inp.append(cur_prop.area / med_area)
        inp.append(cur_prop.area / max_area)
        if len(contours[1]) > 0:
            inp.append(cv2.isContourConvex(cnt) * 1.0)
            inp.append(min(min_area_rect[1]))
            inp.append(max(min_area_rect[1]))
            if max(min_area_rect[1]) > 0:
                inp.append(min(min_area_rect[1]) / max(min_area_rect[1]))
            else:
                inp.append(0)
            inp.append(min_area_rect[2])
        else:
            inp.append(0)
            inp.append(0)
            inp.append(0)
            inp.append(0)
            inp.append(0)
        inp.append(cur_prop.convex_area)
        inp.append(cur_prop.solidity)
        inp.append(cur_prop.eccentricity)
        inp.append(cur_prop.extent)
        inp.append(cur_prop.perimeter)
        inp.append(cur_prop.major_axis_length)
        inp.append(cur_prop.minor_axis_length)
        if(cur_prop.minor_axis_length > 0):
            inp.append(cur_prop.minor_axis_length / cur_prop.major_axis_length)
        else:
            inp.append(0)
            
        inp.append(cur_prop.euler_number)
        inp.append(cur_prop.equivalent_diameter)
        inp.append(cur_prop.perimeter ** 2 / (4 * cur_prop.area * math.pi))
        
        inp.append(is_on_border)        
        inp.append(init_count)
        inp.append(med_area)
        inp.append(cur_prop.area / med_area)

        inp = extend_neighbor_features(inp, cur_prop, pred_props, neighbors100[i], med_area, max_area)
        inp = extend_neighbor_features(inp, cur_prop, pred_props, neighbors200[i], med_area, max_area)
        inp = extend_neighbor_features(inp, cur_prop, pred_props, neighbors300[i], med_area, max_area)
        inp = extend_neighbor_features(inp, cur_prop, pred_props, neighbors400[i], med_area, max_area)
        
        
        inputs.append(np.asarray(inp))
        
    inputs = np.asarray(inputs)
    if seg_df is None:
        return inputs, pred_labels
    else:
        outputs = []
        truth_labels = get_mask(imageid, seg_df)
        truth_labels = measure.label(truth_labels, neighbors=8, background=0)
        truth_props = measure.regionprops(truth_labels)
        
        m = np.zeros((len(pred_props), len(truth_props)))
        
        for x in range(pred_labels.shape[1]):
            for y in range(pred_labels.shape[0]):
                if pred_labels[y, x] > 0 and truth_labels[y, x] > 0:
                    m[pred_labels[y, x]-1, truth_labels[y, x]-1] += 1
                    
        truth_used = set([])
        for i in range(len(pred_props)): 
            max_iou = 0
            for j in range(len(truth_props)):
                if m[i, j] > 0:
                    iou = m[i, j] / (pred_props[i].area + truth_props[j].area - m[i, j])
                    if iou > max_iou:
                        max_iou = iou
                    if iou > 0.5:
                        truth_used.add(j)
            if max_iou <= 0.5:
                max_iou = 0
            outputs.append(max_iou)
            
        outputs = np.asarray(outputs)
        fn = len(truth_props) - len(truth_used)
        
        return inputs, pred_labels, outputs, fn

In [3]:
def decode_mask(mask, shape=(768, 768)):
    pixels = mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def split_label(label):
    n_objs = np.max(label)
    result = []
    for i in range(n_objs):
        obj = (label == i + 1).astype(int)
        result.append(obj)
    return result

def enc_test(yp, name, ship_list_dict):
    masks = split_label(yp)
    if(len(masks) == 0): 
        ship_list_dict.append({'ImageId':name,'EncodedPixels':np.nan})
    for mask in masks:
        mask = (cv2.resize(mask.astype(np.float), (768,768)) > 0.5)
        ship_list_dict.append({'ImageId':name,'EncodedPixels':decode_mask(mask)})
        
def lgb_filter(testid, pre_ships_det, threshold = 0.0, plot = False):
    tmp_in, tmp_label = get_inputs(testid, pre_ships_det)
    ship_list_dict = []
    rle = enc_test(tmp_label, testid, ship_list_dict)

    res_tot = None
    for bst in models:
        res = bst.predict(tmp_in)
        if res_tot is None:
            res_tot = res
        else:
            res_tot += res

    scores = res_tot / len(model_paths)
    final_dict = []

    show_label = tmp_label.copy()
    for idx, score in enumerate(scores):
        if score > threshold:
            if plot:
                print(idx, score)
            tmp_dic = ship_list_dict[idx]
            tmp_dic['score'] = score
            final_dict.append(tmp_dic)
        else:
            show_label[show_label == (idx + 1)] = 0

    if plot:
        fig, axis = plt.subplots(1, 3, figsize=[15,10])
        axis[0].imshow(cv2.imread('../input/test_v2/{}'.format(testid)))
        axis[1].imshow(tmp_label)
        axis[2].imshow(show_label)
        
    if len(final_dict) == 0:
        final_dict.append({'ImageId':testid,'EncodedPixels':np.nan, 'score':0})
        
    return final_dict

In [4]:
# pre_df = pd.read_csv('../result/fastai_resunet768_labest_all.csv',index_col='ImageId')
pre_df = pd.read_csv('../result/fastai_resunet768_labest_2.csv',index_col='ImageId')
pre_df.fillna('', inplace = True)

In [5]:
pre_none = pre_df.loc[pre_df.EncodedPixels.str.len() == 0]
pre_ships = pre_df.loc[pre_df.EncodedPixels.str.len() > 0]

In [6]:
ship_detect = pd.read_csv('../result/ship_detection.csv')

In [7]:
ship_detect.rename({'id':'ImageId'}, axis=1, inplace=True)

In [8]:
pre_ships.shape

(6216, 1)

In [9]:
pre_ships_det = pd.merge(pre_ships, ship_detect, on='ImageId', how='left')

In [10]:
pre_ships_det.set_index('ImageId', inplace=True)

In [11]:
img_ids = pre_ships_det.index

In [12]:
import glob

model_paths = glob.glob('lgb_models/*.txt')
models = []
for path in model_paths:
    bst = lgb.Booster(model_file=path)
    models.append(bst)

In [13]:
tot_final_dict = []
for imgid in tqdm_notebook(img_ids):
    final_dict = lgb_filter(imgid, pre_ships_det, 0.15, False)
    tot_final_dict += final_dict

HBox(children=(IntProgress(value=0, max=6216), HTML(value='')))




In [14]:
tot_ship_df = pd.DataFrame(tot_final_dict)

In [15]:
tot_ship_df.drop_duplicates(inplace=True)

In [16]:
to_compile = tot_ship_df.copy()
to_compile = to_compile[['ImageId', 'EncodedPixels']]

# threshold = 0.2
# to_compile.loc[to_compile.score < threshold, 'EncodedPixels'] = ''
# to_compile.loc[to_compile.score < threshold, 'score'] = 0
# to_compile.drop_duplicates(inplace=True)
# to_compile = to_compile[['ImageId', 'EncodedPixels']]

In [17]:
to_compile.head()

Unnamed: 0,ImageId,EncodedPixels
0,000367c13.jpg,312769 2 313537 4 314305 4 315071 7 315839 6 3...
1,000367c13.jpg,274371 2 275137 6 275905 7 276673 6 277441 6 2...
2,000367c13.jpg,282819 6 283586 7 284353 8 285122 7 285889 8 2...
24,0010551d9.jpg,180731 6 181497 11 182265 12 183032 16 183799 ...
25,002a943bf.jpg,403967 2 404733 4 405499 6 406264 9 407030 11 ...


In [18]:
pre_none.reset_index(inplace=True)
pre_none.head()

Unnamed: 0,ImageId,EncodedPixels
0,00002bd58.jpg,
1,00015efb6.jpg,
2,00023d5fc.jpg,
3,0008ca6e9.jpg,
4,000a4635f.jpg,


In [19]:
df_sub = pd.concat([pre_none, to_compile])

In [20]:
import kaggle_util
kaggle_util.save_result(df_sub, '../result/lgb_filter_deeper.csv', 
                        competition = 'airbus-ship-detection', 
                        send = True, index = False)

save result
upload result
cmd: kaggle competitions submit -c airbus-ship-detection -f ../result/lgb_filter_deeper.csv.7z -m "submit"
