<a href="https://colab.research.google.com/github/kywch/deeplabv3-semantic-scene-segmentation/blob/master/Google-colab_scene_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# check the pytorch installation
import torch
print(torch.cuda.is_available())       # Should be True
print(torch.backends.cudnn.enabled)    # Should be True

True
True


In [0]:
import torch.utils.data
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import os
import csv
import numpy as np
import pickle
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import cv2

### Cloning the github repository to get the necessary files


In [0]:
!git clone https://github.com/kywch/deeplabv3-semantic-scene-segmentation.git

fatal: destination path 'deeplabv3-semantic-scene-segmentation' already exists and is not an empty directory.


In [0]:
# git repo files are placed under `deeplabv3-semantic-scene-segmentation
!ls

deeplabv3-semantic-scene-segmentation  images307.zip  sample_data
images307			       output


In [0]:
# loading the files from git repo
import sys

sys.path.append("deeplabv3-semantic-scene-segmentation/deeplabv3")
sys.path.append("deeplabv3-semantic-scene-segmentation/deeplabv3/model")
from deeplabv3 import DeepLabV3

sys.path.append("deeplabv3-semantic-scene-segmentation/deeplabv3/utils")
from utils import label_img_to_color


### Download the images to process

I am using the example images for this co-lab.

In [0]:
#!wget https://www.dropbox.com/s/s5pmkr2gqh8dx5p/images307.zip

--2020-05-29 02:37:01--  https://www.dropbox.com/s/s5pmkr2gqh8dx5p/images307.zip
Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.1, 2620:100:6016:1::a27d:101
Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/s5pmkr2gqh8dx5p/images307.zip [following]
--2020-05-29 02:37:01--  https://www.dropbox.com/s/raw/s5pmkr2gqh8dx5p/images307.zip
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc93ef772b23135463505e21f901.dl.dropboxusercontent.com/cd/0/inline/A4kBcD1Pj6tPUDi7ynK_u5XfuRP6BIoGe5AkLdbKIPQQbyIVYS5jSUxoW747scNidE1UfkHCfoga42ustxOtaOvnjGk_cJZ9BZzKKBB2yfzM6uSeH_FCQlsmTinppieLrgk/file# [following]
--2020-05-29 02:37:02--  https://uc93ef772b23135463505e21f901.dl.dropboxusercontent.com/cd/0/inline/A4kBcD1Pj6tPUDi7ynK_u5XfuRP6BIoGe5AkLdbKIPQQbyIVYS5jSUxoW747scNidE1UfkHCfoga42ustxOtaOvnjGk_cJZ9BZzKKBB2yfzM6uS

In [0]:
#!ls
#!unzip -q images307.zip

In [0]:
lib_dir = "deeplabv3-semantic-scene-segmentation/deeplabv3/"

# Specify your output directory
#input_dir = "images307" # the image folder from the downloaded file
input_dir = "deeplabv3-semantic-scene-segmentation/input"
output_dir = "output"

In [0]:
network = DeepLabV3("eval_val", project_dir="deeplabv3-semantic-scene-segmentation/deeplabv3", 
                    pretrained_file=lib_dir+"resnet18-5c106cde.pth").cuda()

pretrained resnet, 18


In [0]:
network.load_state_dict(torch.load(lib_dir+"model_13_2_2_2_epoch_580.pth"))

<All keys matched successfully>

In [0]:
class DatasetVal(torch.utils.data.Dataset):
    def __init__(self, input_path, output_path):
        self.img_dir = input_path
        self.img_h = 1024
        self.img_w = 2048
        self.output_dir = output_path
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        self.debug_dir = output_path + '/debug/'
        if not os.path.exists(self.debug_dir):
            os.makedirs(self.debug_dir)
        
        self.examples = []
        file_names = os.listdir(self.img_dir)
        for file_name in file_names:
            if file_name.lower().endswith(('.jpg', '.png', '.jpeg', '.BMP')):
                img_id = file_name.split('.')[0]
                img_path = self.img_dir + '/' + file_name
                example = {}
                example["img_path"] = img_path
                example["img_id"] = img_id
                self.examples.append(example)
        #print(self.examples)

        self.num_examples = len(self.examples)
        print(str(self.num_examples) + " images were identified from the input directory.")
        
    def __getitem__(self, index):
        example = self.examples[index]

        img_id = example["img_id"]
        img_path = example["img_path"]
        img = cv2.imread(img_path, -1) # (shape: (1024, 2048, 3))

        # normalize the img (with the mean and std for the pretrained ResNet):
        img = img/255.0
        img = img - np.array([0.485, 0.456, 0.406])
        img = img/np.array([0.229, 0.224, 0.225]) # (shape: (512, 1024, 3))
        img = np.transpose(img, (2, 0, 1)) # (shape: (3, 512, 1024))
        img = img.astype(np.float32)

        # convert numpy -> torch:
        img = torch.from_numpy(img) # (shape: (3, 512, 1024))
        #label_img = torch.from_numpy(label_img) # (shape: (512, 1024))
        print(img_id)
        return (img, img_id)

    def __len__(self):
        return self.num_examples


In [0]:
val_dataset = DatasetVal(input_path=input_dir, output_path=output_dir)

2 images were identified from the input directory.


In [0]:
val_dataset.examples

[{'img_id': 'street2',
  'img_path': 'deeplabv3-semantic-scene-segmentation/input/street2.jpg'},
 {'img_id': 'street1',
  'img_path': 'deeplabv3-semantic-scene-segmentation/input/street1.jpg'}]

In [0]:
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=1, shuffle=False,
                                         num_workers=1)

In [0]:
network.eval()

DeepLabV3(
  (resnet): ResNet_BasicBlock_OS8(
    (resnet): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d

In [0]:
count = 0

#produce labeled image
for step, (imgs, img_ids) in enumerate(val_loader):
    #print(img_ids)
    with torch.no_grad(): # (corresponds to setting volatile=True in all variables, this is done during inference to reduce memory consumption)
        imgs = Variable(imgs).cuda() # (shape: (batch_size, 3, img_h, img_w))
        outputs = network(imgs) # (shape: (batch_size, num_classes, img_h, img_w))
  
        ########################################################################
        # save data for visualization:
        ########################################################################
        
        outputs = outputs.data.cpu().numpy() # (shape: (batch_size, num_classes, img_h, img_w))
        pred_label_imgs = np.argmax(outputs, axis=1) # (shape: (batch_size, img_h, img_w))
        pred_label_imgs = pred_label_imgs.astype(np.uint8)

        for i in range(pred_label_imgs.shape[0]):
            if i == 0:
                pred_label_img = pred_label_imgs[i] # (shape: (img_h, img_w))
                img_id = img_ids[i]
                img = imgs[i] # (shape: (3, img_h, img_w))

                img = img.data.cpu().numpy()
                img = np.transpose(img, (1, 2, 0)) # (shape: (img_h, img_w, 3))
                img = img*np.array([0.229, 0.224, 0.225])
                img = img + np.array([0.485, 0.456, 0.406])
                img = img*255.0
                img = img.astype(np.uint8)

                pred_label_img_color = label_img_to_color(pred_label_img)
                overlayed_img = 0.35*img + 0.65*pred_label_img_color
                overlayed_img = overlayed_img.astype(np.uint8)
                #producing original predicted images
                cv2.imwrite(val_dataset.output_dir + "/" + img_id + "_pred_label.png", pred_label_img )
                
                # produce debugging info for the first 10 images
                if count < 100:
                    #producing colored predicted images
                    cv2.imwrite(val_dataset.debug_dir + "/" + img_id + "_pred_label_colorimg.png", pred_label_img_color )
                    #producing overlayed images
                    cv2.imwrite(val_dataset.debug_dir + "/" + img_id + "_overlayed.png", overlayed_img)
                

        ########################################################################
        # quick counting
        ########################################################################
        count += 1
                

street2
street1


  "See the documentation of nn.Upsample for details.".format(mode))


In [0]:
# generate the summary csv file from the labeled files
import glob
output_files = glob.glob(val_dataset.output_dir + "/*pred_label.png")

In [0]:
print(val_dataset.output_dir)
print(output_files)

output
['output/street2_pred_label.png', 'output/street1_pred_label.png']


In [0]:
# deeplabv3 - cityscape labels

class_names = ['pcnt_road', 'pcnt_sidewalk', 'pcnt_building', 'pcnt_wall', 'pcnt_fence',
    'pcnt_pole', 'pcnt_traffic_light', 'pcnt_traffic_sign', 'pcnt_vegetation', 'pcnt_terrain',
    'pcnt_sky', 'pcnt_person', 'pcnt_rider', 'pcnt_car', 'pcnt_truck',
    'pcnt_bus', 'pcnt_train', 'pcnt_motorcycle', 'pcnt_bicycle', 'pcnt_unlabeled']
num_class = len(class_names)


In [0]:
# quick check

file = output_files[0]

print(os.path.basename(file).split('_pred_label.png')[0])

pred_label_img = cv2.imread(file, -1)
# count how many pixels in pred_label_img which are of object class trainId:
pixel_ratio = np.zeros(num_class)
for cate_id in range(num_class):
    pixel_ratio[cate_id] = np.sum(np.equal(pred_label_img, cate_id))/pred_label_img.size

print(pixel_ratio.tolist())

street2
[0.14567489801864802, 0.06896124708624708, 0.434604458041958, 0.0, 0.01633158508158508, 0.037807765151515155, 0.001757357226107226, 0.011531177156177156, 0.2095807837995338, 0.0036276223776223774, 0.02861487470862471, 0.014213650932400932, 0.000628277972027972, 0.001065340909090909, 0.00044434731934731936, 0.0, 0.0, 0.00030048076923076925, 0.002576850233100233, 0.022279283216783217]


In [0]:
#save category ratios to csv file
with open('00_object_ratios.csv', 'w') as output:
    writer = csv.writer(output)
    writer.writerow(['image_name'] + class_names)
    
    for file in output_files:
        img_name = os.path.basename(file).split('_pred_label.png')[0]
        pred_label_img = cv2.imread(file, -1)
        
        # count how many pixels in pred_label_img which are of object class trainId:
        pixel_ratio = np.zeros(num_class)
        for cate_id in range(num_class):
            pixel_ratio[cate_id] = np.sum(np.equal(pred_label_img, cate_id))/pred_label_img.size
        
        # write to the csv file
        writer.writerow([img_name] + pixel_ratio.tolist())


In [0]:
!ls

00_object_ratios.csv		       images307      output
deeplabv3-semantic-scene-segmentation  images307.zip  sample_data


In [0]:
from google.colab import files
files.download('00_object_ratios.csv')