In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
from kaggle_util import *
from ship_mrcnn import *
from sklearn.metrics import roc_auc_score, roc_curve, log_loss, f1_score

%matplotlib inline

Using TensorFlow backend.
  from numpy.core.umath_tests import inner1d


In [6]:
mrcnn = pd.read_csv('../result/sub_shiponly_0.csv')
inception_unet = pd.read_csv('../result/inception_unet_256.csv')
stack = pd.read_csv('../result/isship_stack.csv')

sample_sub = pd.read_csv('../input/sample_submission.csv.zip')
test_result = pd.read_csv('../input/test_ship_segmentations.csv.zip')
area_agg = pd.read_csv('../result/suball_area_stat.csv')
dict_max_area = area_agg[['ImageId', 'max']].set_index('ImageId').to_dict()

In [50]:
stack.head()

Unnamed: 0,ImageId,EncodedPixels
0,0001124c7.jpg,
1,000194a2d.jpg,7691 748 8459 748 9227 748 9995 748 10763 748 ...
2,0001b1832.jpg,
3,00052ed46.jpg,
4,000532683.jpg,


In [22]:
def print_pick_stat(test_fact, pick_col, name):
    miss_ships_cnt = len(test_fact.loc[np.logical_and(test_fact[pick_col] <=0,
                                                 test_fact.act_ship > 0)])
    miss_empty_cnt = len(test_fact.loc[np.logical_and(test_fact[pick_col] >0,
                                                 test_fact.act_ship <= 0)])

    return pd.DataFrame({
        'miss_ship': miss_ships_cnt,
        'miss_empty': miss_empty_cnt,
        'miss_total': miss_ships_cnt + miss_empty_cnt,
        'log_loss': log_loss(test_fact.act_ship, test_fact[pick_col]),
        'f1': f1_score(test_fact.act_ship, test_fact[pick_col])
    }, index = [name])
    

In [4]:
pres = [
    #('xception', '../result/Xception_pre_0.801.csv'),
    #('inception', '../result/InceptionV3_pre_0.824.csv'),
    #('resent', '../result/RESNET52_pre_0.836.csv'),
    ('vgg', '../result/VGG16_pre_0.791.csv'),
    ('densenet', '../result/DenseNet169_pre_0.843.csv'),
    #('stackloss', '../result/stack_loss.csv'),
    #('stackunbalance', '../result/stack_unbalance.csv'),
    #('stackpoly', '../result/stack_poly.csv'),
    #('stackpolyunbalance', '../result/stack_poly_unbalance.csv'),
    ('stackpolystat', '../result/stack_poly_stat.csv'),
]

In [5]:
def stat_combination(pres):
    
    sample_sub['vote'] = 0
    for (name, prepath) in pres:
        pre = pd.read_csv(prepath)
        sample_sub['vote'] += pre['pre'] > 0.5

    test_fact = merge_ship(sample_sub, test_result, 'act_ship')
    test_fact = merge_ship(test_fact, suball, 'mrcnn_ship')

    test_fact = pd.merge(test_fact, area_agg, on='ImageId', how='left')
    tot_name = '_'.join([name for (name, path) in pres])

    vote_limit = 0 if len(pres) <3 else 1
    test_fact['vote_pick'] = (test_fact.vote > vote_limit).astype(np.int8)
    return print_pick_stat(test_fact, 'vote_pick', tot_name), test_fact


In [6]:
st, fact = stat_combination(pres)

In [8]:
fact[['ImageId','vote_pick']].to_csv('../result/vote.csv', index=False)

In [2]:
fact = pd.read_csv('../result/vote.csv')

In [3]:
empty_img = fact.loc[fact['vote_pick']==0, 'ImageId']

In [4]:
len(empty_img)

75504

In [None]:
from itertools import combinations

stat = pd.DataFrame()
for cnt in tqdm_notebook(range(1, len(pres) + 1)):
    for row in tqdm_notebook(combinations(pres,cnt)):
        sub_pres = list(row)
        st, fact = stat_combination(sub_pres)
        stat = stat.append(st)

stat['miss_total'] = stat['miss_ship'] + stat['miss_empty']

In [None]:
stat.sort_values('log_loss', ascending=True)

In [None]:
img_valid = test_fact.loc[test_fact.vote_pick > 0, 'ImageId']

In [None]:
test_fact.head()

In [None]:
right = test_fact.loc[test_fact.mrcnn_ship_cnt != test_fact.act_ship_cnt, 'ImageId']

In [None]:
right.shape[0] / suball.shape[0]

In [None]:
test_fact.loc[np.logical_and(test_fact.mrcnn_ship == 0, test_fact.act_ship <= 0), 'ImageId'].shape

In [None]:
def add_area(df):
    df.fillna('', inplace=True)
    df['split'] = df['EncodedPixels'].str.split()
    df['area'] = df['split'].apply(lambda x:sum([int(x[i + 1]) for i in range(0, len(x), 2)]))
    df.drop('split', axis=1, inplace=True)
    return df
sub_check = add_area(sub_check)
big_ships = sub_check.loc[sub_check.area > 4000, 'ImageId']


In [4]:
def count_ships(df):
    df.fillna('', inplace=True)
    df['cnt'] = df.EncodedPixels.apply(lambda x:len(x) > 0)
    df_agg = df.groupby('ImageId')['cnt'].agg('sum')
    return df_agg.astype(np.uint)

In [7]:
agg_list = [
    (inception_unet, 'incept_unet'),
    (mrcnn, 'mrcnn'),
    (test_result, 'truth'),
]

df_all = pd.DataFrame()
for (rle, name) in agg_list:
    agg = count_ships(rle)
    df_all[name] = agg
    
for col in df_all.columns:
    if 'wrong' not in col and 'truth' not in col:
        df_all['wrong_ship_{}'.format(col)] = np.logical_and(df_all[col] > 0, df_all.truth == 0)
        df_all['wrong_empty_{}'.format(col)] = np.logical_and(df_all[col] == 0, df_all.truth > 0)
        df_all['wrong_cnt_{}'.format(col)] = np.logical_and(df_all[col] != df_all.truth, True)
df_all.sum()

incept_unet                 40464.0
mrcnn                      163174.0
truth                       25693.0
wrong_ship_incept_unet       4713.0
wrong_empty_incept_unet      3028.0
wrong_cnt_incept_unet       13160.0
wrong_ship_mrcnn            28459.0
wrong_empty_mrcnn             488.0
wrong_cnt_mrcnn             34566.0
dtype: float64

In [78]:
def ensemble_noship(df_mrcnn, savefile):

    empty_img = list((df_all.loc[df_all.incept_unet == 0]).index)
    len(empty_img)
    df_mrcnn.drop('cnt', axis=1, inplace = True)
    print(df_mrcnn.head())
    
    sub_check = df_mrcnn.copy()
    sub_check['noship'] = 0
    prog = tqdm_notebook(total=len(empty_img))
    for idx, emp_col in enumerate(empty_img):
        sub_check.loc[sub_check.ImageId == emp_col, 'noship'] = 1
        prog.update(1)

    sub_check.EncodedPixels = sub_check.apply(lambda x : x.EncodedPixels if x.noship == 0 else '', axis=1)
    sub_check.fillna('', inplace=True)
    sub_check.drop_duplicates(inplace=True)
    if 'noship' in sub_check.columns:
        sub_check.drop('noship', axis=1, inplace=True)
    if 'cnt' in sub_check.columns:
        sub_check.drop('cnt', axis=1, inplace=True)
    
    print(sub_check.head())
    
    kaggle_util.save_result(sub_check, '../result/{}.csv'.format(savefile), 
                        competition = 'airbus-ship-detection', 
                        send = True, index = False)
    
ensemble_noship(mrcnn, 'inceptunet_mrcnn')

         ImageId                                      EncodedPixels
0  0001124c7.jpg                                                   
1  000194a2d.jpg  556637 14 557393 33 558142 75 558906 82 559674...
2  000194a2d.jpg  94833 3 95596 11 96362 15 97128 18 97895 19 98...
3  0001b1832.jpg                                                   
4  00052ed46.jpg  408106 6 408873 8 409640 10 410408 10 411175 1...


HBox(children=(IntProgress(value=0, max=78474), HTML(value='')))

         ImageId                                      EncodedPixels
0  0001124c7.jpg                                                   
1  000194a2d.jpg  556637 14 557393 33 558142 75 558906 82 559674...
2  000194a2d.jpg  94833 3 95596 11 96362 15 97128 18 97895 19 98...
3  0001b1832.jpg                                                   
4  00052ed46.jpg                                                   
save result
upload result
cmd: kaggle competitions submit -c airbus-ship-detection -f ../result/inceptunet_mrcnn.csv.7z -m "submit"


HBox(children=(IntProgress(value=0, max=78474), HTML(value='')))

In [34]:
sub_check.head()

Unnamed: 0,ImageId,EncodedPixels
0,0001124c7.jpg,
2,000194a2d.jpg,17457 25 18225 25 18993 25 19761 25 20529 25 2...
3,000194a2d.jpg,94833 6 95597 11 96362 15 97128 17 97896 17 98...
4,0001b1832.jpg,
5,00052ed46.jpg,


In [35]:
(sub_check.EncodedPixels=='').sum() / sub_check.ImageId.nunique()

0.8879209039548023

In [36]:
kaggle_util.save_result(sub_check, '../result/inceptunet_mrcnnres101.csv', 
                        competition = 'airbus-ship-detection', 
                        send = True, index = False)

save result
upload result
cmd: kaggle competitions submit -c airbus-ship-detection -f ../result/inceptunet_mrcnnres101.csv.7z -m "submit"


In [None]:
sub_old = pd.read_csv('../result/sub_noship_all_0.874.csv')

In [2]:
folds = pd.read_csv('../input/folds.csv')

In [3]:
folds

Unnamed: 0,ImageId,ships,has_ship,fold,holdout
0,00003e153.jpg,0,0,2,0
1,000155de5.jpg,1,1,1,1
2,00021ddc3.jpg,9,1,1,0
3,0002756f7.jpg,2,1,0,0
4,0002d0f32.jpg,0,0,1,0
5,000303d4d.jpg,0,0,1,0
6,00031f145.jpg,1,1,2,1
7,00053c6ba.jpg,1,1,0,0
8,00057a50d.jpg,0,0,0,1
9,0005d01c8.jpg,2,1,1,0
