## Problem:

- In out problem, need to detect the nearest pedestrian road to see if the car is violating or not 
- need to know which part is the pedestrian road and which part is the car road  

In [1]:
import os
import sys
import cv2
import time
import mmcv
import torch
import numpy as np
import ffmpeg
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm

repo_path = Path(".").absolute().parent

if os.system == "nt":
    data_path = Path("D:\Datas\parking_violation")
else:
    data_path = repo_path.parent / "data" / "parking_violation"
sys.path.append(str(repo_path))

package repo tree

```
├── data
│   ├── mmseg
│   │   └── checkpoints
│   └── parking_violation
├── mmsegmentation
│   └── configs
│       └── resnest
└── parking_violation
    ├── utils.py
    └── notebooks
        └── 04_Segmentation.ipynb
```

don't forget to download the weight first

**backbone: resnest**

```
!wget https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth -P ~/code/data/mmseg/checkpoints/
```

**backbone: R-18-D8**

```
!wget https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth -P ~/code/data/mmseg/checkpoints/
```

In [2]:
import mmseg
from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot
from mmseg.core.evaluation import get_palette
from mmseg.datasets import CityscapesDataset
from utils import imsaver, correct_rgb, resize_image

model_config = "deeplabv3plus"
backbone_config = "resnest" # "r18-d8" 
backbone_dict = {
    "r18-d8": {
        "config_dir": "deeplabv3plus",
        "config": "deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py",
        "checkpoint": "deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth"
    },
    "resnest": {
        "config_dir": "resnest",
        "config": "deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py",
        "checkpoint": "deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth"
    } 
}


config_path = repo_path.parent / "mmsegmentation" / "configs" / backbone_dict[backbone_config]["config_dir"]
checkpoint_path = repo_path.parent / "data" / "mmseg" / "checkpoints"
if not checkpoint_path.exists():
    checkpoint_path.mkdir(parents=True)

config_file = str(config_path / backbone_dict[backbone_config]["config"])
checkpoint_file = str(checkpoint_path / backbone_dict[backbone_config]["checkpoint"])

In [3]:
# Preprocess video: Origin size is 720x1280
height, width = 480, 640
# height, width = 720, 1280
video_path = str(data_path / "origin" / "sample1.mp4")
resized_video_path = str(data_path / f"sample1_{height}x{width}.mp4")
resized_frames_path = data_path / f"sample1_{height}x{width}"
if not resized_frames_path.exists():
    resized_frames_path.mkdir()

frames_path = resized_frames_path / "img_dir"
# uncomment underline to resize video
# mmcv.resize_video(video_path, resized_video_path, (width, height))
# video = mmcv.VideoReader(resized_video_path)
# video.cvt2frames(frames_path)

## Load Model

In [4]:
model = init_segmentor(config_file, checkpoint_file, device="cuda:0")

In [5]:
from mmseg.datasets import build_dataloader, CustomDataset
from mmseg.apis.inference import LoadImage
from mmseg.datasets.pipelines import Compose
from mmcv.parallel import collate, scatter

In [6]:
def batch_inference_segmentor(model, imgs, test_pipeline):
    """Inference image(s) with the segmentor.

    Args:
        model (nn.Module): The loaded segmentor.
        imgs (list[str/ndarray]): Either image files or loaded images.
        
    Returns:
        (list[Tensor]): The segmentation result.
    """
    
    assert isinstance(imgs, list), "Must be a list type, contains images"
    device = next(model.parameters()).device  # model device
    
    # prepare data
    res = []
    for img in imgs:
        data = dict(img=img)
        data = test_pipeline(data)
        res.append(data)
    data = collate(res, samples_per_gpu=len(imgs))
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        data['img_metas'] = [i.data[0] for i in data['img_metas']]
        
    # TODO: apply CRF in batch inference
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
        
    return result

In [7]:
cfg = model.cfg
cfg.data.test.type = "Cityscapes"
cfg.data.workers_per_gpu = 1
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(2048, 1024),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),            
        ]
    )
]
cfg.data.test.pipeline = test_pipeline
# dataset = CustomDataset(
#     cfg.data.test.pipeline, test_path, 
#     classes=CityscapesDataset.CLASSES, 
#     palette=CityscapesDataset.PALETTE,
#     test_mode=True)
# data_loader = build_dataloader(dataset, samples_per_gpu=3, workers_per_gpu=1, dist=False, shuffle=False, 
#                                dataloader_type="DataLoader")

---

In [8]:
def batch_frame_loader(video, batch_size=4, preprocess=None, *args):
    # TODO: change video --> available for str files or numpy arrays
    n = len(video) // batch_size
    for i in range(n):
        if i == n-1:
            batches = video[i:n]
        else:
            batches = video[i*batch_size:(i*batch_size+batch_size)]
        if preprocess:
            batches = list(map(preprocess, batches))
        yield batches
        
def preprocessor(x, ratio):
    # keep the certain height of image
    h, w, _ = x.shape
    assert ratio > 0.0 and ratio <= 1.0, "ratio must between (0.0, 1.0]"
    return x[:int(ratio*h)]

def post_process(x, pleft=0, ptop=30, pright=200, pbottom=30):
    h, w, _ = x.shape
    img = mmcv.impad(x, padding=(pleft, ptop, pright, pbottom), pad_val=0)
    # pts: lu, ld, rd, ru & each point = (x, y)
    pts = [np.array([(w,ptop),(w,ptop+h),(w+pright,ptop+h),(w+pright,ptop)])]
    cv2.fillPoly(img, pts=pts, color=(255,255,255))
    for i, (cls, clr) in enumerate(class2color.items()):
        percent = i/len(class2color)
        pos_x = int(w + 0.1*pright)
        pos_y = int(percent*h + ptop) + int(h*0.025)
        cv2.circle(img, (pos_x, pos_y), int(h*0.01), clr, thickness=-1)
        cv2.putText(img, cls, (pos_x+int(h*0.025), pos_y+3), cv2.FONT_HERSHEY_SIMPLEX, h*0.001, color=(0,0,0))

    return img

output_dir = resized_frames_path / "output" / backbone_config
if not output_dir.exists():
    output_dir.mkdir()
video = mmcv.VideoReader(resized_video_path)

batch_size = 4
ratio = 0.65
test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:]
test_pipeline = Compose(test_pipeline)
class2color = dict(zip(*[CityscapesDataset.CLASSES, CityscapesDataset.PALETTE]))
pleft, ptop, pright, pbottom = 0, 50, 150, 50
bgr_palette = np.array(CityscapesDataset.PALETTE)[..., ::-1]  # need to convert to bgr

In [9]:
data_loader = batch_frame_loader(video, batch_size=batch_size, preprocess=lambda x: preprocessor(x, ratio))

idx = 1
for frames in tqdm(data_loader, total=(len(video)//batch_size + 1), desc="Processing"):
    # TODO: apply CRF in batch inference
    result = batch_inference_segmentor(model, frames, test_pipeline)
    torch.cuda.empty_cache()
    for frame, res in zip(frames, result):
        overlay_img = model.show_result(frame, [res], palette=bgr_palette, show=False)
        img = post_process(overlay_img)
        imsaver(img, str(output_dir / f"sample1-{idx:06d}.jpg"))
        idx += 1

Processing: 100%|█████████▉| 905/906 [49:05<00:03,  3.25s/it]


In [10]:
from moviepy.editor import concatenate_videoclips, ImageClip
imgs = sorted(list(map(str, output_dir.glob("*.jpg"))))

clips = [ImageClip(m).set_duration(1/30) for m in imgs]
video_output_path = str(data_path / f"sample1_{height}x{width}_{backbone_config}_result.mp4")

concat_clip = concatenate_videoclips(clips, method="compose")
concat_clip.write_videofile(video_output_path, fps=30) #int(video.fps/step)

t:   0%|          | 0/3617 [00:00<?, ?it/s, now=None]

Moviepy - Building video /home/simonjisu/code/data/parking_violation/sample1_480x640_resnest_result.mp4.
Moviepy - Writing video /home/simonjisu/code/data/parking_violation/sample1_480x640_resnest_result.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/simonjisu/code/data/parking_violation/sample1_480x640_resnest_result.mp4


In [58]:
import ipywidgets as wd
from IPython.display import display, HTML

In [60]:
HTML("""
<video width="640" height="300" controls>
  <source src="{}" type="video/mp4">
</video>
""".format(video_output_path))

---

**To be updated**

## Process Segmentation on Video

need to post process using CRF, apply it when on batch outcomes to classify the final output

### Conditional Random Field

CRF(Conditional Random Field) is a softmax

For example, we have images $X_i: (H, W, C)$ and segmented mask $Y_i: (H, W, K)$, $K$ is the number of classes to classify

### Maximum Entropy Markov Model (MEMM)

In [None]:
import torch
import torch.nn as nn
from crfseg import CRF

def freeeze(model):
    for param in model.parameters():
        param.requires_grad = False

model = init_segmentor(config_file, checkpoint_file, device="cuda:0")
cfg = model.cfg

In [None]:
cityspaces_path = repo_path.parent / "data" / "cityscapes"
train_dataset = CityscapesDataset(data_root=cityspaces_path, pipeline=cfg.train_pipeline, img_dir="leftImg8bit/train", test_mode=False)
val_dataset = CityscapesDataset(data_root=cityspaces_path, pipeline=cfg.test_pipeline, img_dir="leftImg8bit/val", test_mode=True)
test_dataset = CityscapesDataset(data_root=cityspaces_path, pipeline=cfg.test_pipeline, img_dir="leftImg8bit/test", test_mode=True)

In [None]:
CRFmodel = CRF(n_spatial_dims=len(train_dataset.CLASSES))

In [None]:
from mmseg.datasets import build_dataloader, build_dataset

In [None]:
seg_logit = model.inference(data['img'][0], data['img_metas'][0], rescale=True)