In [1]:
from turtle import color
%env RSN_HOME=./
%env PYTHONPATH=/env/python:./
!mkdir $RSN_HOME/model_logs

env: RSN_HOME=./
env: PYTHONPATH=/env/python:./
mkdir: .//model_logs: File exists


In [2]:
import os
import numpy as np
import cv2
import glob
import torchvision.transforms as transforms
from tqdm import tqdm_notebook

import torch

In [3]:
from config import cfg
from network import RSN

from lib.utils.transforms import flip_back

import matplotlib as mplt
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from natsort import natsorted

In [4]:
data_txt  = np.loadtxt('./test_images.txt', dtype='str')
# cfg
normalize = transforms.Normalize(mean=cfg.INPUT.MEANS, std=cfg.INPUT.STDS)
transform = transforms.Compose([transforms.ToTensor(), normalize])
color_rgb = False
border = 10
kernel = 5
shifts = [0.25]
model_file = "trained_model.pth" #os.path.join('./4XRSN18.coco', "iter-{}.pth".format(0))
pairs = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
         [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
         [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
color = np.random.randint(0, 256, (cfg.DATASET.KEYPOINT.NUM, 3)).tolist()

In [5]:
model = RSN(cfg)
if os.path.exists(model_file):
    state_dict = torch.load(
            model_file, map_location=lambda storage, loc: storage)
    state_dict = state_dict['model']
    model.load_state_dict(state_dict)

# device = torch.device("cuda", 0)
device = torch.device("cpu")
model.to(device)
cpu_device =  torch.device("cpu")

model.eval()

RSN(
  (top): ResNet_top(
    (conv): conv_bn_relu(
      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (stage0): Single_stage_module(
    (downsample): ResNet_downsample_module(
      (layer1): Sequential(
        (0): Bottleneck(
          (conv_bn_relu1): conv_bn_relu(
            (conv): Conv2d(64, 104, kernel_size=(1, 1), stride=(1, 1))
            (bn): BatchNorm2d(104, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu): ReLU(inplace=True)
          )
          (conv_bn_relu2_1_1): conv_bn_relu(
            (conv): Conv2d(26, 26, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (bn): BatchNorm2d(26, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu): ReL

In [11]:
for (iteration, image_path) in tqdm_notebook(enumerate(data_txt)):

    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    ori_image = image.copy()

    height = image.shape[0]
    width = image.shape[1]

    if color_rgb:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image,(cfg.INPUT_SHAPE[1], cfg.INPUT_SHAPE[0]))
    image = transform(image)
    image = image.unsqueeze(0)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        output = output.to(cpu_device).numpy()

        if cfg.TEST.FLIP:
            images_flipped = np.flip(image.to(cpu_device).numpy(), 3).copy()
            images_flipped = torch.from_numpy(images_flipped).to(device)
            output_flipped = model(images_flipped)
            output_flipped = output_flipped.to(cpu_device).numpy()
            output_flipped = flip_back(output_flipped, cfg.DATASET.KEYPOINT.FLIP_PAIRS)

    output = (output + output_flipped) * 0.5

    pred = np.zeros((cfg.DATASET.KEYPOINT.NUM, 2))
    score = np.zeros((cfg.DATASET.KEYPOINT.NUM, 1))

    score_map = output[0].copy()
    score_map = score_map / 255 + 0.5

    dr = np.zeros((cfg.DATASET.KEYPOINT.NUM,
                       cfg.OUTPUT_SHAPE[0] + 2 * border, cfg.OUTPUT_SHAPE[1] + 2 * border))
    dr[:, border: -border, border: -border] = output[0].copy()
    for w in range(cfg.DATASET.KEYPOINT.NUM):
        dr[w] = cv2.GaussianBlur(dr[w], (kernel, kernel), 0)
        # a = np.mat(dr[w])
        # cv2.imshow('a',a)
        # cv2.waitKey(0)
    for w in range(cfg.DATASET.KEYPOINT.NUM):
        for j in range(len(shifts)):
            if j == 0:
                lb = dr[w].argmax()
                y, x = np.unravel_index(lb, dr[w].shape)
                dr[w, y, x] = 0
                x -= border
                y -= border
            lb = dr[w].argmax()
            py, px = np.unravel_index(lb, dr[w].shape)
            dr[w, py, px] = 0
            px -= border + x
            py -= border + y
            ln = (px ** 2 + py ** 2) ** 0.5
            if ln > 1e-3:
                x += shifts[j] * px / ln
                y += shifts[j] * py / ln
        x = max(0, min(x, cfg.OUTPUT_SHAPE[1] - 1))
        y = max(0, min(y, cfg.OUTPUT_SHAPE[0] - 1))

        pred[w] = np.array([x * 4 + 2, y * 4 + 2])
        score[w, 0] = score_map[w, int(round(y) + 1e-9), \
                                int(round(x) + 1e-9)]

    # aligned or not ...pred[:, 1] * h / cfg.INPUT_SHAPE[0]
    pred[:, 0] = pred[:, 0] * width / cfg.INPUT_SHAPE[1]
    pred[:, 1] = pred[:, 1] * height / cfg.INPUT_SHAPE[0]

    pred = pred.astype(int)

    joints = pred.copy()
    show_img = mpimg.imread(image_path)
    plt.imshow(show_img)
    plt.scatter(joints[:,0], joints[:,1], marker="o", color="red", s=10)
    plt.savefig("results/{}".format(image_path))
    plt.close()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for (iteration, image_path) in tqdm_notebook(enumerate(data_txt)):


0it [00:00, ?it/s]

Demo on one image

In [116]:
def fill_dataset_file(images_dir):
    test_images = natsorted(os.listdir(images_dir))

    with open("test_images.txt", "w") as data:
        for name in test_images:
            data.write("{}/{}\n".format(images_dir, name))
        data.flush()

In [117]:
fill_dataset_file("images")

In [12]:
def make_video(images_dir, result_path):
    file_names = natsorted(os.listdir(images_dir))
    img_array = []
    for filename in file_names:
        img = cv2.imread(images_dir + "/" + filename)
        height, width, layers = img.shape
        size = (width,height)
        img_array.append(img)


    out = cv2.VideoWriter(result_path, cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

    for i in range(len(img_array)):
        out.write(img_array[i])
    out.release()

In [13]:
make_video("results/images", "results/result.mp4")

OpenCV: FFMPEG: tag 0x58564944/'DIVX' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
