# 03 Semantic Segementation

In [1]:
!pip install wget #For colab



In [2]:
import wget, argparse, zipfile, tarfile, sys
from pathlib import Path

def simple_progress_bar(current, total, width=80):
    progress_message = "Downloading: %d%% [%d / %d] bytes" % (current / total * 100, current, total)
    sys.stdout.write("\r" + progress_message); sys.stdout.flush()

def get_and_unpack(url, location='data/'):
    Path(location).mkdir(exist_ok=True)
    filename = wget.download(url, out=location, bar=simple_progress_bar)
    zip_ref = zipfile.ZipFile(filename, 'r')
    zip_ref.extractall(path=location)
    zip_ref.close()

In [3]:
get_and_unpack(url='http://www.welchlabs.io/unccv/deep_learning/bbc_train.zip')

Downloading: 66% [60432384 / 90591239] bytes

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



- But wait, there's more! 
- Let's explore one more type of problem we can solve using a very similar architecture: **semantic segmentation**. 
- Our goal in semantic segmentation is to assign each pixel to a specific class. 
- Now is a good time to start thinking about what type of dimension we need for the output of our nueral network in order to solve this problem.

In [4]:
%pylab inline
from fastai.vision import *
from pathlib import Path
import pandas as pd
import json

### 1. Data

In [None]:
path=Path('data/bbc_train')

In [None]:
path.ls()

In [None]:
fnames = get_image_files(path/'images')
fnames[:3]

In [None]:
lbl_names = get_image_files(path/'masks')
lbl_names[:3]

In [None]:
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))

In [None]:
def get_y_fn(x): return path/'masks'/(x.stem + '.png')
classes = array(['background', 'brick', 'ball', 'cylinder'])

In [None]:
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)

In [None]:
unique(mask.data)

In [None]:
src = (SegmentationItemList.from_folder(path/'images')
       .split_by_rand_pct(0.2)
       .label_from_func(get_y_fn, classes=classes))

In [None]:
data = (src.transform(get_transforms(), tfm_y=True, size=(256,256))
        .databunch(bs=16)
        .normalize(imagenet_stats))

In [None]:
data

In [None]:
data.show_batch(3, figsize=(6,6))

### 2. Model

- Now, as you may have guessed we need to implement another custom head. 
- However, this time we need a slightly more complicated architecture for this portion of our network. 
- The challenge here is that we need to **upsample**, or make our tensors larger as they pass through the network. 
- We can achieve this using a special operation called a **transposed convoulation** - we'll dig more into the details of this operation later. 
- The thing to know for knwo is that transposed convulutional layers in our neural network allow us to learn an "upsampling" or mapping form smaller to larger tensors. 

In [None]:
def conv_trans(ni, nf, ks = 4, stride = 2, padding = 1):
    return nn.Sequential(
        nn.ConvTranspose2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding = padding), 
        nn.ReLU(inplace = True), 
        nn.BatchNorm2d(nf))

In [None]:
custom_head=nn.Sequential(conv_trans(512, 256), 
                          conv_trans(256, 128),
                          conv_trans(128, 64),
                          conv_trans(64, 32), 
                          nn.ConvTranspose2d(32, 4, kernel_size=4, bias=False, stride=2, padding = 1))

- We need to intorduce a slightly specialized performance measure here, the accuracy evaluate pixel by pixel (like we're solving a classification problem for each pixel!

In [None]:
def pixel_accuracy(yhat, y):
    y_=y.squeeze(dim=1)
    yhat_=yhat.argmax(dim=1)
    return (y_==yhat_).sum().float()/y.numel()

In [None]:
learn=cnn_learner(data, models.resnet18, custom_head=custom_head, metrics=pixel_accuracy)

In [None]:
learn.freeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
lr=1e-2
learn.fit_one_cycle(4, slice(lr/10, lr))

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
lr=1e-4
learn.fit_one_cycle(8, slice(lr/10, lr))

In [None]:
learn.save('basic-segmentation')

### 3. Results

In [None]:
def blend_image_mask(im, mask, alpha=0.7):
    im_mask=np.zeros((im.shape[1], im.shape[2], 3), dtype='float32')
    for j in range(3): im_mask[:,:,j]=im.mean(axis=0)
    for j in range(1, 4):
        im_mask[mask==j] = alpha*im_mask[mask==j] + (1-alpha)*np.array([float(i==(j-1)) for i in range(3)])
    return im_mask

In [None]:
def make_legend():
    custom_lines = [Line2D([0], [0], color='r', lw=2, alpha=0.5),
                    Line2D([0], [0], color='g', lw=2, alpha=0.5),
                    Line2D([0], [0], color='b', lw=2, alpha=0.5)]

    legend(custom_lines, ['Brick', 'Ball', 'Cylinder'])

In [None]:
denorm=learn.data.denorm

In [None]:
with torch.no_grad():
    x,y = next(iter(learn.data.valid_dl))
    yhat = to_np(learn.model(x))
    x=denorm(x)

In [None]:
x.shape, y.shape, yhat.shape

In [None]:
yhat=yhat.argmax(1)

In [None]:
fig=figure(0, (20, 10))

for i in range(8):
    fig.add_subplot(2,4,i+1)
    im_mask=blend_image_mask(to_np(x[i]), yhat[i])
    imshow(im_mask); axis('off'); make_legend()

## Save Model Weights

In [None]:
learn.export('/home/bbc-semantic-segmentation.pkl')