In [273]:
import pandas as pd
import numpy as np
from ship_mrcnn import *
import gc
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from skimage import measure
from sklearn.neighbors import KDTree
from multiprocessing import Pool
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import KFold, StratifiedKFold

In [116]:
def masks_as_image(rle, IMG_WIDTH = 768, IMG_HEIGHT = 768):
    # Take the individual ship masks and create a single mask array for all ships
    cnt = len(rle)
    all_masks = np.zeros((IMG_WIDTH, IMG_HEIGHT, cnt), dtype = np.int8)
    for idx, mask in enumerate(rle):
        if isinstance(mask, str):
            all_masks[:,:,idx] += rle_decode(mask)
    return all_masks

def make_total_img(masks, IMG_WIDTH = 768, IMG_HEIGHT = 768):
    tot_mask = np.zeros((IMG_WIDTH, IMG_HEIGHT), dtype = np.int8)
    for i in range(masks.shape[-1]):
        tot_mask += masks[...,i]
    tot_mask = (tot_mask > 0).astype(np.int8)
    return tot_mask

def masks_as_image_idx(rle, IMG_WIDTH = 768, IMG_HEIGHT = 768):
    all_masks = np.zeros((IMG_WIDTH, IMG_HEIGHT), dtype = np.int8)
    for idx, mask in enumerate(rle):
        if isinstance(mask, str):
            submask = rle_decode(mask)
            all_masks[submask > 0] = idx + 1
    return all_masks

In [103]:
def get_best_iou(mask, truth_masks):
    max_iou = 0
    best_idx = -1
    for j in range(truth_masks.shape[-1]):
        overlap = np.logical_and(mask, truth_masks[...,j])
        iou = overlap.sum() / mask.sum()
        if iou > max_iou:
            max_iou = iou
            best_idx = j
    return max_iou, best_idx

def get_mask_target(pre_masks, truth_masks):
    dict_target = {}
    for i in range(pre_masks.shape[-1]):
        max_iou, best_idx = get_best_iou(pre_masks[...,i], truth_masks)
        dict_target[i] = best_idx if max_iou > 0.5 else -1
    return dict_target

def check_target(rle, truth_masks):
    pre_mask = rle_decode(rle)
    max_iou, best_idx = get_best_iou(pre_mask, truth_masks)
    return max_iou > 0.5

In [2]:
train_truth = pd.read_csv('../input/train_ship_segmentations.csv.zip')
train_truth.fillna('', inplace=True)
train_truth['hasship'] = train_truth['EncodedPixels'].apply(lambda x:len(x) > 0)

oof_0 = pd.read_csv('../result/oof_0_shiponly_0.csv')
oof_1 = pd.read_csv('../result/oof_1_shiponly_1.csv')
oof_2 = pd.read_csv('../result/oof_2_shiponly_2.csv')
oof_all = pd.concat([oof_0, oof_1, oof_2])
del oof_0, oof_1, oof_2
gc.collect()
oof_all.fillna('', inplace=True)
oof_all['hasship'] = oof_all['EncodedPixels'].apply(lambda x:len(x) > 0)
checkids = oof_all.loc[oof_all.hasship, 'ImageId'].unique()

In [6]:
oof_agg = oof_all.groupby('ImageId')['hasship'].agg('sum')
truth_agg = train_truth.groupby('ImageId')['hasship'].agg('sum')

agg_all = pd.DataFrame({'oof':oof_agg, 'truth':truth_agg})
agg_all['oof_moreship'] = agg_all.oof > agg_all.truth
agg_all['oof_lessship'] = agg_all.oof < agg_all.truth
agg_all['oof_right'] = agg_all.oof == agg_all.truth

In [69]:
df_train = oof_all.loc[oof_all.hasship > 0]

In [104]:
true_ship = []
df_train['true_ship'] = False
truth_img = ''
truth_masks = None
prog = tqdm_notebook(total = df_train.shape[0])
for idx, row in df_train.iterrows():
#     print(row)
    if row.ImageId != truth_img:
        truth_img = row.ImageId
        truth = train_truth.loc[train_truth.ImageId == row.ImageId]
        if truth.shape[0] == 1 and truth.iloc[0].hasship == False:
            truth_masks = None
        else:
            truth_masks = masks_as_image(truth.EncodedPixels)
        
    if truth_masks is None:
        true_ship.append(False)
    else:
        true_ship.append(check_target(row.EncodedPixels, truth_masks))
    
    prog.update(1)
df_train['true_ship'] = true_ship
#     if idx > 10:
#         break

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


HBox(children=(IntProgress(value=0, max=259745), HTML(value='')))

In [214]:
df_train = df_train.set_index('ImageId')
df_train = pd.merge(df_train, 
                    df_train.groupby('ImageId')['hasship'].agg(['cumsum']).reset_index(), on='ImageId', how='left')

In [230]:
def get_inputs(imageid, df_train):
    sep_count = 3
    sep_thresholds = [0.6, 0.7, 0.8]
    
    inputs = []    
    sub_df = df_train.loc[df_train.ImageId == imageid, 'EncodedPixels']
    guids = df_train.loc[df_train.ImageId == imageid, 'guid']
    pred = pred_labels = masks_as_image_idx(sub_df)
    pred_props = measure.regionprops(pred_labels)
    
    init_count = len(pred_props)
    coords = [pr.centroid for pr in pred_props]
    if len(coords) > 0:
        t = KDTree(coords)
        neighbors100 = t.query_radius(coords, r=50)
        neighbors200 = t.query_radius(coords, r=100)
        neighbors300 = t.query_radius(coords, r=150)
        neighbors400 = t.query_radius(coords, r=200)
        med_area = np.median(np.asarray([pr.area for pr in pred_props]))
        max_area = np.max(np.asarray([pr.area for pr in pred_props]))
    
    for i in range(len(pred_props)):
        is_on_border = 1 * ((pred_props[i].bbox[0] <= 1) | (pred_props[i].bbox[1] <= 1) | (pred_props[i].bbox[2] >= pred_labels.shape[0] - 1) | (pred_props[i].bbox[3] >= pred_labels.shape[1] - 1))
        
        msk_reg = pred_labels[pred_props[i].bbox[0]:pred_props[i].bbox[2], pred_props[i].bbox[1]:pred_props[i].bbox[3]] == i+1
        pred_reg = pred[pred_props[i].bbox[0]:pred_props[i].bbox[2], pred_props[i].bbox[1]:pred_props[i].bbox[3]]
        
        contours = cv2.findContours((msk_reg * 255).astype(dtype=np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        if len(contours[1]) > 0:
            cnt = contours[1][0]
            min_area_rect = cv2.minAreaRect(cnt)

        inp = {}
        inp['guid'] = guids.iloc[i]
        inp['area'] = pred_props[i].area
        inp['tot_med_area'] = med_area
        inp['tot_med_area_ratio'] = pred_props[i].area / med_area
        inp['tot_max_area'] = max_area
        inp['tot_max_area_ratio'] = pred_props[i].area / max_area
        
        if len(contours[1]) > 0:
            inp['contours_cnt'] = cv2.isContourConvex(cnt) * 1.0
            inp['min_area_rect'] = min(min_area_rect[1])
            inp['max_area_rect'] = max(min_area_rect[1])
            inp['area_rect_ratio'] = inp['min_area_rect'] / inp['max_area_rect'] if inp['max_area_rect'] > 0 else 0
            inp['min_area_rect2'] = min_area_rect[2]
        else:
            inp['contours_cnt'] = 0
            inp['min_area_rect'] = 0
            inp['max_area_rect'] = 0
            inp['area_rect_ratio'] = 0
            inp['min_area_rect2'] = 0
            
        inp['convex_area'] = pred_props[i].convex_area
        inp['solidity'] = pred_props[i].solidity
        inp['eccentricity'] = pred_props[i].eccentricity
        inp['extent'] = pred_props[i].extent
        inp['perimeter'] = pred_props[i].perimeter
        inp['major_axis_length'] = pred_props[i].major_axis_length
        inp['minor_axis_length'] = pred_props[i].minor_axis_length
        inp['axis_length_ratio'] = pred_props[i].minor_axis_length / pred_props[i].major_axis_length if pred_props[i].minor_axis_length > 0 else 0

        inp['euler_number'] = pred_props[i].euler_number
        inp['equivalent_diameter'] = pred_props[i].equivalent_diameter
        inp['round_ratio'] = pred_props[i].perimeter ** 2 / (4 * pred_props[i].area * math.pi)
        
        inp['is_border'] = is_on_border 
        inp['init_count'] = init_count
        
        def calc_neighbors(inp, neighbors, flex):
            inp['neighbors{}'.format(flex)] = neighbors[i].shape[0]
            if neighbors[i].shape[0] > 0:
                neighbors_areas = np.asarray([pred_props[j].area for j in neighbors100[i]])
                median_area = np.median(neighbors_areas)
                maximum_area = np.max(neighbors_areas)
                inp['nbr{}_median_area'.format(flex)] = median_area
                inp['nbr{}_median_area_ratio'.format(flex)] = pred_props[i].area / median_area
                inp['nbr{}_max_area'.format(flex)] = maximum_area
                inp['nbr{}_max_area_ratio'.format(flex)] = pred_props[i].area / maximum_area
            else:
                inp['nbr{}_median_area'.format(flex)] = 0
                inp['nbr{}_median_area_ratio'.format(flex)] = 0
                inp['nbr{}_max_area'.format(flex)] = 0
                inp['nbr{}_max_area_ratio'.format(flex)] = 0
                
            return inp
        
        inp = calc_neighbors(inp,neighbors100, 100)
        inp = calc_neighbors(inp,neighbors200, 200)
        inp = calc_neighbors(inp,neighbors300, 300)
        inp = calc_neighbors(inp,neighbors400, 400)
        
        inputs.append(inp)
    return inputs

In [236]:
params = []
for imageid in tqdm_notebook(df_train.ImageId.unique()):
    params.append((imageid, df_train))

HBox(children=(IntProgress(value=0, max=61507), HTML(value='')))

In [241]:
with Pool(processes=8) as pool:
    results = pool.starmap(get_inputs, params)

In [260]:
df_input = pd.DataFrame()
for subres in tqdm_notebook(results):
    df_input = pd.concat([df_input, pd.DataFrame(subres)])

HBox(children=(IntProgress(value=0, max=61507), HTML(value='')))

In [277]:
df_tot = pd.merge(df_train, df_input, on = 'guid', how = 'left')
cols =[col for col in df_tot.columns if col not in ['EncodedPixels','hasship', 'guid','cumsum']]
df_tot = df_tot[cols]
df_tot['true_ship'] = df_tot['true_ship'].astype(np.uint8)

In [278]:
df_tot.to_csv('../result/oof_stat.csv', index=False)

In [285]:
inception_unet = pd.read_csv('../result/inception_unet.csv')

def count_ships(df):
    df.fillna('', inplace=True)
    df['cnt'] = df.EncodedPixels.apply(lambda x:len(x) > 0)
    df_agg = df.groupby('ImageId')['cnt'].agg('sum')
    return df_agg.astype(np.uint)

incept_agg = count_ships(inception_unet)
incept_agg = incept_agg.reset_index()

In [286]:
df_tot = pd.merge(df_tot, incept_agg, on = 'ImageId', how = 'left')

In [292]:
incept_agg.head()

Unnamed: 0,ImageId,cnt
0,0001124c7.jpg,0
1,000194a2d.jpg,1
2,0001b1832.jpg,0
3,00052ed46.jpg,0
4,000532683.jpg,2


In [293]:
df_tot[['ImageId','cnt']]

Unnamed: 0,ImageId,cnt
0,0002756f7.jpg,
1,00053c6ba.jpg,
2,0006c52e8.jpg,
3,0006c52e8.jpg,
4,0006c52e8.jpg,
5,0006c52e8.jpg,
6,000e37fc6.jpg,
7,000fd9827.jpg,
8,00113a75c.jpg,
9,00113a75c.jpg,
