In [None]:
import os, json, random, cv2
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf, re, math
from tqdm import tqdm

from PIL import Image
import torch
from torchvision import transforms

# Intro

Thank you for visiting.   
In this notebook we worked on removing the background using [DeepLabV3](https://pytorch.org/hub/pytorch_vision_deeplabv3_resnet101/).


As a reference for image cropping and plotting, I used [this notebook](https://www.kaggle.com/lextoumbourou/happywhale-tfrecords-with-bounding-boxes) created by [Lex Toumbourou](https://www.kaggle.com/lextoumbourou) .  
The [Detic bounding box predictions](https://www.kaggle.com/c/happy-whale-and-dolphin/discussion/305503) created by [phalanx](https://www.kaggle.com/phalanx).

In [None]:
train_df = pd.read_csv('../input/happywhale-splits/skf_species_10folds.csv')
test_df = pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
test_df['split'] = test_df.index%10

In [None]:
train_df.head()

In [None]:
def read_bbox(bbox):
    return np.array([int(i) for i in bbox.split()])

In [None]:
def show_crop(row, crops=None, dataset='train'):
    crops = crops or {'yolov5', 'detic'}
    f, axarr = plt.subplots(1, len(crops) + 1, figsize=(18, 6))
    img = Image.open(f'../input/happy-whale-and-dolphin/{dataset}_images/{row.image}')
    
    arr_num = 0
    axarr[arr_num].imshow(img)

    if 'detic' in crops:
        detic_crop = img.crop(read_bbox(row.detic_bbox))
        arr_num += 1
        axarr[arr_num].set_title('Detic')
        axarr[arr_num].imshow(detic_crop)

    plt.show()

# Detic

Load the [Detic bounding box predictions] created by [phalanx](https://www.kaggle.com/phalanx).

In [None]:
detic_train_df = pd.read_csv('../input/whale2-cropped-dataset/train2.csv')
detic_test_df = pd.read_csv('../input/whale2-cropped-dataset/test2.csv')

In [None]:
detic_train_df.head()

In [None]:
detic_train_df.loc[detic_train_df.box.isna(), 'box'] = ''
detic_test_df.loc[detic_test_df.box.isna(), 'box'] = ''

In [None]:
train_df['detic_bbox'] = detic_train_df.box
test_df['detic_bbox'] = detic_test_df.box

In [None]:
row = train_df.iloc[1610]
row

In [None]:
show_crop(row, {'detic'})

Any row missing bounding boxes, will use values -1, -1, -1, -1 as null value.

In [None]:
train_df.loc[train_df.detic_bbox == '', 'detic_bbox'] = '-1 -1 -1 -1'
test_df.loc[test_df.detic_bbox == '', 'detic_bbox'] = '-1 -1 -1 -1'

# DEEPLABV3

In [None]:
def load_model():
    #model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet101', pretrained=True)
    #model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
    #model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_mobilenet_v3_large', pretrained=True)
    #model = torch.hub.load('pytorch/vision:v0.10.0', 'fcn_resnet50', pretrained=True)
    model = torch.hub.load('pytorch/vision:v0.10.0', 'fcn_resnet101', pretrained=True)
    model.eval()
    return model

def make_transparent_foreground(pic, mask):
    # split the image into channels
    b, g, r = cv2.split(np.array(pic).astype('uint8'))
    # add an alpha channel with and fill all with transparent pixels (max 255)
    a = np.ones(mask.shape, dtype='uint8') * 255
    # merge the alpha channel back
    alpha_im = cv2.merge([b, g, r, a], 4)
    # create a transparent background
    bg = np.zeros(alpha_im.shape)
    # setup the new mask
    new_mask = np.stack([mask, mask, mask, mask], axis=2)
    # copy only the foreground color pixels from the original image where mask is set
    foreground = np.where(new_mask, alpha_im, bg).astype(np.uint8)
    return foreground

In [None]:
deeplab_model = load_model()

In [None]:
def remove_background(model, input_image):
    
    preprocess = transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    with torch.no_grad():
        output = model(input_batch)['out'][0]
    output_predictions = output.argmax(0)

    # create a binary (black and white) mask of the profile foreground
    mask = output_predictions.byte().cpu().numpy()
    background = np.zeros(mask.shape)
    bin_mask = np.where(mask, 255, background).astype(np.uint8)

    foreground = make_transparent_foreground(input_image ,bin_mask)

    return foreground, bin_mask

In [None]:
def show_image(deeplab_model, row, crops=None, dataset='train'):
    crops = crops or {'yolov5', 'detic'}
    f, axarr = plt.subplots(1, 4, figsize=(18, 6)) # len(crops) + 1
    print(row.image)
    img = Image.open(f'../input/happy-whale-and-dolphin/{dataset}_images/{row.image}')
    
    arr_num = 0
    axarr[arr_num].set_title(row.image)
    axarr[arr_num].imshow(img)

    if 'detic' in crops:
        detic_crop = img.crop(read_bbox(row.detic_bbox))
        arr_num += 1
        axarr[arr_num].set_title('Detic')
        axarr[arr_num].imshow(detic_crop)
        
        foreground, bin_mask = remove_background(deeplab_model, detic_crop)

    
    arr_num += 1
    axarr[arr_num].set_title('foreground')
    axarr[arr_num].imshow(foreground)
    
    arr_num += 1
    axarr[arr_num].set_title('mask')
    axarr[arr_num].imshow(bin_mask)
    print(np.count_nonzero(bin_mask >1))
        

    plt.show()

In [None]:
row = train_df.iloc[161]
show_image(deeplab_model, row, {'detic'})

It looks like it's working.  
However, for many images it does not work.  

In [None]:
def extract_mask_percentage(deeplab_model, row, crops=None, dataset='train'):
    crops = crops or {'yolov5', 'detic'}
    img = Image.open(f'../input/happy-whale-and-dolphin/{dataset}_images/{row.image}')
    detic_crop = img.crop(read_bbox(row.detic_bbox)) 
    try:
        foreground, bin_mask = remove_background(deeplab_model, detic_crop)
    except:
        return 0
    
    percentage = np.count_nonzero(bin_mask >0) / (bin_mask.shape[0] * bin_mask.shape[0])
    return percentage

In [None]:
success_remove_index = []
for i in tqdm(range(500)):
    row = train_df.iloc[i]
    percentage = extract_mask_percentage(deeplab_model, row, {'detic'})
    if percentage >0.6:
        print(i)
        success_remove_index.append(i)
    

In [None]:
success_remove_index

In [None]:
for i in success_remove_index:
    row = train_df.iloc[i]
    show_image(deeplab_model, row, {'detic'})

As you can see, removing the background is not working.   
Further improvements will be needed.