In [21]:
import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import os
import numpy as np
import json
import csv
from PIL import Image
from tqdm import tqdm
import string
import time
import random
import cv2

In [22]:
root_path = './demo'
video_path = os.path.join(root_path,'basketball_01.mp4')
file_name = video_path[:-4].split('/')[-1]
# save all frames

# Opens the Video file
raw_path = os.path.join(root_path,file_name)
! mkdir -p $raw_path
cap = cv2.VideoCapture(video_path)
i=0
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == False:
        break
    cv2.imwrite(raw_path + '/' + str(i).zfill(5) + '.jpg',frame)
    i+=1
cap.release()

# Segmentation

In [23]:
import argparse
from distutils.version import LooseVersion
# Numerical libs
import numpy as np
import torch
import torch.nn as nn
from scipy.io import loadmat
import csv
# Our libs
from mit_semseg.dataset import TestDataset
from mit_semseg.models import ModelBuilder, SegmentationModule
from mit_semseg.utils import colorEncode, find_recursive, setup_logger
from mit_semseg.lib.nn import user_scattered_collate, async_copy_to
from mit_semseg.lib.utils import as_numpy
from PIL import Image
from tqdm import tqdm
from mit_semseg.config import cfg


In [24]:
colors = loadmat('./semantic-segmentation-pytorch/data/color150.mat')['colors']
names = {}
with open('./semantic-segmentation-pytorch/data/object150_info.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        names[int(row[0])] = row[5].split(";")[0]

In [25]:
def test(segmentation_module, loader, gpu):
    segmentation_module.eval()

    pbar = tqdm(total=len(loader))
    for batch_data in loader:
        # process data
        batch_data = batch_data[0]
        segSize = (batch_data['img_ori'].shape[0],
                   batch_data['img_ori'].shape[1])
        img_resized_list = batch_data['img_data']

        with torch.no_grad():
            scores = torch.zeros(1, cfg.DATASET.num_class, segSize[0], segSize[1])
            scores = async_copy_to(scores, gpu)

            for img in img_resized_list:
                feed_dict = batch_data.copy()
                feed_dict['img_data'] = img
                del feed_dict['img_ori']
                del feed_dict['info']
                feed_dict = async_copy_to(feed_dict, gpu)

                # forward pass
                pred_tmp = segmentation_module(feed_dict, segSize=segSize)
                scores = scores + pred_tmp / len(cfg.DATASET.imgSizes)
           # torch.save(scores,'scores.pt')
            _, pred = torch.max(scores, dim=1)
            pred = as_numpy(pred.squeeze(0).cpu())

        # visualization
        visualize_result(
            (batch_data['img_ori'], batch_data['info']),
            pred,
            cfg,scores
        )
        pbar.update(1)

In [26]:
def visualize_result(data, pred, cfg, scores):
    (img, info) = data
    # print predictions in descending order
    pred = np.int32(pred)
    pixs = pred.size
    uniques, counts = np.unique(pred, return_counts=True)
#     print("Predictions in [{}]:".format(info))
    # colorize prediction
    pred_color = colorEncode(pred, colors).astype(np.uint8)

    # aggregate images and save
    im_vis = np.concatenate((img, pred_color), axis=1)

    img_name = info.split('/')[-1]
    #:torch.save(scores,os.path.join(cfg.TEST.result, img_name.replace('.jpg', '.pt')))
    scores = scores[0,:,:,:]
    sem_scores,sem_labels=torch.topk(scores,3,dim=0)
    sem_labels = sem_labels.type(torch.FloatTensor)
    sem_labels = torch.div(sem_labels,255)
    shape = sem_labels.size()
    sem_scores = np.transpose(sem_scores.cpu().numpy(),(1,2,0))
    sem_labels = np.transpose(sem_labels.cpu().numpy(),(1,2,0))
    sem_scores = sem_scores[...,[2,1,0]]
    sem_labels = sem_labels[...,[2,1,0]]
    Image.fromarray((sem_scores*255).astype(np.uint8)).save(
        os.path.join(cfg.TEST.result,'sem_score', img_name.replace('.jpg', '.png')))
    Image.fromarray((sem_labels*255).astype(np.uint8)).save(
        os.path.join(cfg.TEST.result,'sem_label', img_name.replace('.jpg', '.png')))

In [27]:
def main(cfg, gpu):
    torch.cuda.set_device(gpu)

    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder,
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder,
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder,
        use_softmax=True)

    crit = nn.NLLLoss(ignore_index=-1)

    segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)

    # Dataset and Loader
    dataset_test = TestDataset(
        cfg.list_test,
        cfg.DATASET)
    loader_test = torch.utils.data.DataLoader(
        dataset_test,
        batch_size=cfg.TEST.batch_size,
        shuffle=False,
        collate_fn=user_scattered_collate,
        num_workers=5,
        drop_last=True)

    segmentation_module.cuda()

    # Main loop
    test(segmentation_module, loader_test, gpu)

    print('Inference done!')

In [28]:
config_path = './semantic-segmentation-pytorch/config/ade20k-resnet50dilated-ppm_deepsup.yaml'
cfg.merge_from_file(config_path)

cfg.DIR = './semantic-segmentation-pytorch/ckpt/ade20k-resnet50dilated-ppm_deepsup'
cfg.TEST.checkpoint = 'epoch_20.pth'
cfg.TEST.result = './demo/basketball_01/'

cfg.MODEL.arch_encoder = cfg.MODEL.arch_encoder.lower()
cfg.MODEL.arch_decoder = cfg.MODEL.arch_decoder.lower()

# absolute paths of model weights
cfg.MODEL.weights_encoder = os.path.join(
    cfg.DIR, 'encoder_' + cfg.TEST.checkpoint)
cfg.MODEL.weights_decoder = os.path.join(
    cfg.DIR, 'decoder_' + cfg.TEST.checkpoint)

assert os.path.exists(cfg.MODEL.weights_encoder) and \
    os.path.exists(cfg.MODEL.weights_decoder), "checkpoint does not exitst!"


# read data
img_path = './demo/basketball_01/raw'
if os.path.isdir(img_path):
        imgs = find_recursive(img_path)
else:
        imgs = [img_path]
assert len(imgs), "imgs should be a path to image (.jpg) or directory."

cfg.list_test = [{'fpath_img': x} for x in imgs]

if not os.path.isdir(cfg.TEST.result):
    os.makedirs(cfg.TEST.result)

In [29]:
time_start = time.time()
main(cfg, 0)
time_end = time.time()
print('Runing time: {}'.format(time_end - time_start))

Loading weights for net_encoder





  0%|          | 0/90 [00:00<?, ?it/s][A[A[A

Loading weights for net_decoder
# samples: 90





  1%|          | 1/90 [00:02<03:40,  2.48s/it][A[A[A


  2%|▏         | 2/90 [00:04<03:16,  2.24s/it][A[A[A


  3%|▎         | 3/90 [00:05<03:01,  2.08s/it][A[A[A


  4%|▍         | 4/90 [00:07<02:49,  1.97s/it][A[A[A


  6%|▌         | 5/90 [00:09<02:42,  1.91s/it][A[A[A


  7%|▋         | 6/90 [00:11<02:36,  1.86s/it][A[A[A


  8%|▊         | 7/90 [00:12<02:31,  1.83s/it][A[A[A


  9%|▉         | 8/90 [00:14<02:28,  1.81s/it][A[A[A


 10%|█         | 9/90 [00:16<02:23,  1.77s/it][A[A[A


 11%|█         | 10/90 [00:18<02:22,  1.78s/it][A[A[A


 12%|█▏        | 11/90 [00:19<02:20,  1.77s/it][A[A[A


 13%|█▎        | 12/90 [00:21<02:16,  1.75s/it][A[A[A


 14%|█▍        | 13/90 [00:23<02:14,  1.75s/it][A[A[A


 16%|█▌        | 14/90 [00:24<02:11,  1.73s/it][A[A[A


 17%|█▋        | 15/90 [00:26<02:10,  1.75s/it][A[A[A


 18%|█▊        | 16/90 [00:28<02:11,  1.77s/it][A[A[A


 19%|█▉        | 17/90 [00:30<02:08,  1.75s/it][A[A[A


 20

Inference done!
Runing time: 159.04933881759644
