In [22]:
import pandas as pd
from tqdm import tqdm_notebook
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from dataset import sigmoid

In [23]:
def post_process(probability, threshold, min_size):
    """
    Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored
    """
    # don't remember where I saw it
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((350, 525), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num

def rle_decode(mask_rle: str = '', shape: tuple = (350, 525)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def mask2rle(img):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
def dice(img1, img2):
    img1 = np.asarray(img1).astype(np.bool)
    img2 = np.asarray(img2).astype(np.bool)

    intersection = np.logical_and(img1, img2)

    return 2. * intersection.sum() / (img1.sum() + img2.sum())

def check_null(sub):
    count = 0
    name_list = []
    null_list = []
    mask_list = 0
    total_null = 0
    for i in tqdm_notebook(range(len(sub))):
        name, mask = sub.iloc[i]
        name = name.split('_')[0]
        if name not in name_list:
            if not mask_list:
                total_null += 1
                null_list.append(sub.iloc[i-1][0].split('_')[0])
            mask_list = 0
            name_list.append(name)
            
        if mask:
            mask_list += 1
        
    return total_null, null_list

def refine(null_list, original_out, encoded, threshold, msize):
    resign = []
    for i in tqdm_notebook(range(len(dummy_df))):
        if dummy_df.iloc[i].iat[0].split('_')[0] in null_list:
            predict, num_predict = post_process(original_out[i], threshold, msize)
            if num_predict == 0:
                resign.append('')
            else:
                resign.append(mask2rle(predict))
        else:
            resign.append('')
    # merge
    final_out = []
    for i in range(len(original_out)):
        if encoded[i] != '':
            final_out.append(encoded[i])
        elif resign[i] != '':
            final_out.append(resign[i])
        else:
            final_out.append('')
    checkc = pd.read_csv('./sample_submission.csv')
    checkc['EncodedPixels'] = final_out
    # sub.to_csv('ALL_post_ensemble_msize5000.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
    num,new_list = check_null(checkc)
    print(num)
    return final_out, new_list

In [24]:
path = "./csv"
dummy_df = pd.read_csv(f"{path}/sub_effib2.csv")

for i, csv in enumerate(os.listdir(path)):
#     if csv.startswith("Uneteffi"):
#     if "effib2_" in csv:
    if csv.endswith(".csv"):
        print("loading: ",csv)
        df = pd.read_csv(f"{path}/"+csv)
        df = df.rename(columns={"EncodedPixels":i})
        dummy_df = pd.merge(dummy_df,df,on=['Image_Label'])
if 1:
    dummy_df = dummy_df.drop(dummy_df.columns[1],axis=1)

loading:  UNetresnet34_best_x1mask.csv
loading:  Uneteffib2_640.csv
loading:  sub_effib2.csv
loading:  Unetresnet34_last_640.csv
loading:  Uneteffib2_256.csv
loading:  FPNeffib2_384.csv
loading:  FPNresnet34_best_384.csv
loading:  FPNeffib2_640.csv
loading:  UNetresnet34_last_x2mask.csv
loading:  PSPNetresnet34_640.csv
loading:  UNetresnet34_best_x2mask.csv
loading:  UNetresnet34_last_x1mask.csv
loading:  Unetresnet34_best_640.csv
loading:  FPNresnet34_best_320.csv
loading:  FPNeffib2_256.csv
loading:  PSPNetresnet34_384.csv
loading:  PSPNetresnet34_256.csv
loading:  FPNresnet34_last_640.csv
loading:  Uneteffib2_384.csv
loading:  FPNresnet34_best_640.csv
loading:  FPNeffib2_320.csv
loading:  FPNresnet34_last_384.csv
loading:  FPNeffib2_64.csv
loading:  FPNresnet34_last_320.csv


In [25]:
dummy_df.head()

Unnamed: 0,Image_Label,0,EncodedPixels_best_x,EncodedPixels_last_x,2,3,EncodedPixels_best_y,EncodedPixels_last_y,EncodedPixels_best_x.1,EncodedPixels_last_x.1,...,17,EncodedPixels_best_x.2,EncodedPixels_last_x.2,19,EncodedPixels_best_y.1,EncodedPixels_last_y.1,21,EncodedPixels_best,EncodedPixels_last,23
0,002f507.jpg_Fish,,,,,,,,,,...,,,,,,,,,,
1,002f507.jpg_Flower,,,,,,,,,,...,,,,,,,,,,
2,002f507.jpg_Gravel,23 3 83 13 97 3 101 7 115 41 159 2 162 3 168 4...,11 226 240 40 282 9 295 3 303 18 356 338 705 3...,8 319 355 339 704 342 1053 344 1402 346 1752 3...,5 337 353 343 702 346 1052 347 1401 348 1751 3...,65 7 104 4 115 2 140 1 146 5 354 330 703 335 1...,95 2 98 42 160 1 164 17 360 300 706 326 1054 3...,363 234 606 48 706 321 1054 333 1404 336 1753 ...,6 325 355 329 704 334 1053 337 1402 340 1751 3...,7 323 356 330 704 336 1053 340 1402 342 1751 3...,...,7 320 355 324 703 329 1053 332 1402 337 1752 3...,9 230 243 4 256 54 353 340 702 343 1052 345 14...,7 310 322 3 352 340 702 343 1052 345 1402 346 ...,10 317 358 321 706 325 1055 329 1404 333 1753 ...,10 312 359 323 708 326 1057 329 1406 332 1755 ...,4 311 353 322 702 327 1052 332 1401 336 1751 3...,3 236 244 82 353 326 702 330 1051 335 1401 338...,1 59846 59851 345 60201 344 60551 342 60901 34...,1 59496 59501 346 59851 345 60201 344 60551 34...,6 319 355 325 704 329 1053 332 1402 335 1752 3...
3,002f507.jpg_Sugar,,,,,,,,,,...,,,,,,,,,1238 5 1583 24 1930 32 2277 37 2624 42 2971 47...,
4,0035ae9.jpg_Fish,,,,,,61389 3 61737 7 62085 18 62431 30 62780 34 631...,53309 4 53642 24 53990 28 54339 31 54687 34 55...,62441 2 62782 3 62790 5 63128 19 63473 26 6382...,104771 3 105118 10 105467 17 105815 36 106165 ...,...,,,,,95702 10 96050 15 96072 2 96399 26 96748 28 97...,,,45222 5 45569 10 45918 13 46266 17 46615 20 46...,,


In [26]:
output_list = []
for i in tqdm_notebook(range(len(dummy_df))):
    index = np.where(dummy_df.iloc[i].isnull()==False,1,0)
    tol = sum(index) - 1
    ave = np.zeros((350, 525))
    for j in range(len(index[1:])):
        if index[1:][j]:
#             ave += rle_decode(dummy_df.iat[i,j+1])
            ave += rle_decode(dummy_df.iloc[i].iat[j+1])
        else:
            continue
    ave /= len(index[1:])
    output_list.append(ave)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))




In [31]:
encoded = []
for i in tqdm_notebook(range(len(output_list))):
    predict, num_predict = post_process(output_list[i], 0.65, 15000)
    if num_predict == 0:
        encoded.append('')
    else:
        encoded.append(mask2rle(predict))

sub = pd.read_csv('./sample_submission.csv')
sub['EncodedPixels'] = encoded
# sub.to_csv('effi_0.7_14000.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
num, list_ = check_null(sub)
print(num)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))


159


In [21]:
sub.to_csv('all_0.65_14000.csv', columns=['Image_Label', 'EncodedPixels'], index=False)

In [33]:
# reassigned null name
out, new_list = refine(list_,output_list,encoded, 0.6,13000)
# out, new_list = refine(new_list,output_list,out, 0.35,15000)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))


120


In [35]:
sub = pd.read_csv('./sample_submission.csv')
sub['EncodedPixels'] = out
sub.to_csv('alal.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
check_null(sub)[0]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))




120

In [10]:
import matplotlib.pyplot as plt
def check_mask(x,y=""):

    mask_encoded = final_out[x]
    assert mask_encoded != ''
    mask2 = rle_decode(mask_encoded)
    img = cv2.imread("./test_images/"+sub.iloc[x][0].split('_')[0])
    img = cv2.resize(img, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
    
    kernel = np.ones((5,5), np.uint8) 
#     mask1 = cv2.dilate(mask1,kernel, iterations=1)
#     mask1 = cv2.blur(mask1)
    
    plt.figure(figsize=(16,8))
    plt.subplot(1,2,1)
    plt.imshow(img)
    plt.subplot(1,2,2)
    plt.imshow(mask2)
    plt.show()

In [11]:
for i in range(25):
    n = np.random.randint(14792)
    try:
        check_mask(n)
    except:
        continue