In [None]:
# change dir to ../
import os
import sys
os.chdir("..")
sys.path.append("src/cmx")

In [7]:
# function definition
import numpy as np
def get_class_colors(*args):
        def uint82bin(n, count=8):
            """returns the binary of integer n, count refers to amount of bits"""
            return ''.join([str((n >> y) & 1) for y in range(count - 1, -1, -1)])

        N = 41
        cmap = np.zeros((N, 3), dtype=np.uint8)
        for i in range(N):
            r, g, b = 0, 0, 0
            id = i
            for j in range(7):
                str_id = uint82bin(id)
                r = r ^ (np.uint8(str_id[-1]) << (7 - j))
                g = g ^ (np.uint8(str_id[-2]) << (7 - j))
                b = b ^ (np.uint8(str_id[-3]) << (7 - j))
                id = id >> 3
            cmap[i, 0] = r
            cmap[i, 1] = g
            cmap[i, 2] = b
        class_colors = cmap.tolist()
        return class_colors

def color_mask(mask, colors):
    """
    Assign colors to a mask image.
    Args:
        mask: a PIL Image with a single channel
        colors: a list of RGB colors
    """
    colored_mask = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
    for i in range(len(colors)):
        colored_mask[mask == i] = colors[i]
    return colored_mask

In [2]:
# load the model
import torch
import torch.nn as nn
from src.cmx.config import config
from src.cmx.models.builder import EncoderDecoder as segmodel

criterion = nn.CrossEntropyLoss(reduction='mean', ignore_index=config.background)
BatchNorm2d = nn.BatchNorm2d
model = segmodel(cfg=config, criterion=criterion, norm_layer=BatchNorm2d)

# set the model to device and set to eval mode
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
# init weights from a pre-trained model
model.init_weights(config, pretrained='out/log_lindenthal-camera-traps_mit_b0_pretrained_jet_dropout/checkpoint/epoch-last.pth')
model.eval()

loading annotations into memory...
Done (t=0.15s)
creating index...
index created!
loading annotations into memory...


[32m22 01:40:20 [0mUsing backbone: Segformer-B0


Done (t=0.11s)
creating index...
index created!


  from .autonotebook import tqdm as notebook_tqdm
[32m22 01:40:22 [0mUsing MLP Decoder
[32m22 01:40:22 [0mLoading pretrained model: /media/jorge/HDD/TFG/pretrained/segformers/mit_b0.pth


mit_b0 has 10795720 parameters.


  raw_state_dict = torch.load(model_file, map_location=torch.device('cpu'))
[32m22 01:40:23 [0mLoad model, Time usage:
	IO: 0.7322523593902588, initialize parameters: 0.014470100402832031
[32m22 01:40:23 [0mIniting weights ...
[32m22 01:40:23 [0mLoading pretrained model: out/log_lindenthal-camera-traps_mit_b0_pretrained_jet_dropout/checkpoint/epoch-last.pth
[32m22 01:40:30 [0mLoad model, Time usage:
	IO: 7.257330894470215, initialize parameters: 0.004538536071777344
[32m22 01:40:30 [0mIniting weights ...


EncoderDecoder(
  (backbone): mit_b0(
    (patch_embed1): OverlapPatchEmbed(
      (proj): Conv2d(3, 32, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
      (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
    )
    (patch_embed2): OverlapPatchEmbed(
      (proj): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (patch_embed3): OverlapPatchEmbed(
      (proj): Conv2d(64, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((160,), eps=1e-05, elementwise_affine=True)
    )
    (patch_embed4): OverlapPatchEmbed(
      (proj): Conv2d(160, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (extra_patch_embed1): OverlapPatchEmbed(
      (proj): Conv2d(3, 32, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
      (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)

In [None]:
# open a folder and iterate over all the images
import os
os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
import os.path as osp
import cv2 as cv
import numpy as np

from src.utils.postprocessing import depth_to_colormap
from src.cmx.engine.evaluator import Evaluator

# set the directory
VIDEO_ID = 'bag_20210802015851'
rgb_dir = 'color'
depth_dir = 'depth'
video_dir = osp.join('data', 'lindenthal-camera-traps', 'lindenthal_coco', 'images', VIDEO_ID)

# get the class colors for the mask
colors = get_class_colors()
# create the Evaluator object

# for every image in the color folder in alphabetical order
for rgb_fn in sorted(os.listdir(osp.join(video_dir, rgb_dir))):
    image_id = int(rgb_fn.split('.')[0])
    # skip every non 10th image
    if image_id % 10 != 0 or image_id < 30:
        continue
    # load the images
    depth_fn = f'{image_id:06d}.exr'
    rgb_image = cv.imread(osp.join(video_dir, rgb_dir, rgb_fn))
    depth_image = cv.imread(osp.join(video_dir, depth_dir, depth_fn), cv.IMREAD_UNCHANGED)
    # save these images
    cv.imwrite(f'{image_id:06d}_rgb.png', rgb_image)
    # normalize the images to 0-1 in float32
    rgb_image = cv.cvtColor(rgb_image, cv.COLOR_BGR2RGB) / 255.0
    depth_image = depth_to_colormap(depth_image, 'jet', equalize=False)
    cv.imwrite(f'{image_id:06d}_depth.png', depth_image)
    depth_image = depth_image / 255.0
    # convert to tensor
    rgb_image = torch.from_numpy(np.ascontiguousarray(rgb_image)).permute(2, 0, 1).unsqueeze(0).float()
    depth_image = torch.from_numpy(np.ascontiguousarray(depth_image)).permute(2, 0, 1).unsqueeze(0).float()
    print(rgb_image.shape, depth_image.shape)

    # forward pass
    with torch.no_grad():
        pred = model(rgb_image.to(device), depth_image.to(device))
    pred = torch.argmax(pred, dim=1).cpu().numpy().squeeze()

    # save the segmentation
    pred_fn = f'{image_id:06d}_pred.png'
    result_img = color_mask(pred.astype(np.uint8), colors)
    cv.imwrite(pred_fn, pred)
    break


torch.Size([1, 3, 480, 848]) torch.Size([1, 3, 480, 848])
