In [8]:
import sys
sys.path.append("/home/rchoudhu/research/voxelpose-pytorch/lib")

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.utils.data
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter
import argparse
import pprint
import logging
import json
import time
import matplotlib.pyplot as plt

from tqdm import tqdm

#import _init_paths
from core.config import config
from core.config import update_config
from core.function import train_3d, validate_3d
from utils.utils import create_logger
from utils.utils import save_checkpoint, load_checkpoint, load_model_state
from utils.utils import load_backbone_panoptic
from utils.vis import save_debug_3d_images
import dataset
import models
import json
viz_test_dir = "video_viz"

CAMPUS_LIMBS = [[0, 1], [1, 2], [3, 4], [4, 5], [6, 7], [7, 8], [9, 10], [10, 11], [12, 13]]


In [10]:
#%%
# Load predictions and ground truth
json_data_path = "./test_dump_panoptics.json"
zero_frames = []
with open(json_data_path) as f:
    json_data = json.load(f)
print(json_data[0].keys())
sorted_data = sorted(json_data, key=lambda x: 0 if len(x['mpjpes']) == 0 else np.max(x['mpjpes']))
for x in sorted_data[::-1]:
   if len(x['mpjpes']) == 0:
       zero_frames.append(x['frame_idx'])
       #print("zero size at frame idx %d" % x['frame_idx'])
   else:
       print("Frame: %d MPJP#Es %s" % (x['frame_idx'], str(np.max(x['mpjpes']))))
print(zero_frames)

#xs = [data['frame_idx'] for data in json_data]
#ys = []
#for data in json_data:
#    ys.append(0 if len(data['mpjpes']) == 0 else np.max(data['mpjpes']))

#plt.scatter(xs, ys)
#plt.show()

dict_keys(['frame_idx', 'image_paths', 'pred', 'gt', 'mpjpes'])
Frame: 1357 MPJP#Es 1382.9923309299556
Frame: 1494 MPJP#Es 1183.7893427575154
Frame: 1055 MPJP#Es 930.9267422891947
Frame: 1514 MPJP#Es 851.7826406540119
Frame: 726 MPJP#Es 795.7312084431226
Frame: 727 MPJP#Es 702.8845306586446
Frame: 880 MPJP#Es 651.0914492671513
Frame: 896 MPJP#Es 201.6863887589663
Frame: 888 MPJP#Es 169.45452395121296
Frame: 2282 MPJP#Es 137.0177443136166
Frame: 1362 MPJP#Es 122.19517075561953
Frame: 2260 MPJP#Es 112.67996834129598
Frame: 1054 MPJP#Es 109.37081354384719
Frame: 554 MPJP#Es 108.5898695674894
Frame: 1216 MPJP#Es 108.18639559251478
Frame: 1020 MPJP#Es 104.85477431094634
Frame: 1019 MPJP#Es 102.44086731647158
Frame: 1629 MPJP#Es 99.59386533147675
Frame: 1081 MPJP#Es 97.83112483313674
Frame: 1748 MPJP#Es 94.0084220533041
Frame: 732 MPJP#Es 90.78861655761132
Frame: 549 MPJP#Es 90.13648242089384
Frame: 1021 MPJP#Es 89.05029982334962
Frame: 219 MPJP#Es 86.90500610539466
Frame: 1066 MPJP#Es 84.78

In [3]:
def coco2campus3D(coco_pose):
    """
    transform coco order(our method output) 3d pose to shelf dataset order with interpolation
    :param coco_pose: np.array with shape 17x3
    :return: 3D pose in campus order with shape 14x3
    """
    campus_pose = np.zeros((14, 3))
    coco2campus = np.array([16, 14, 12, 11, 13, 15, 10, 8, 6, 5, 7, 9])
    campus_pose[0: 12] += coco_pose[coco2campus]

    mid_sho = (coco_pose[5] + coco_pose[6]) / 2  # L and R shoulder
    head_center = (coco_pose[3] + coco_pose[4]) / 2  # middle of two ear

    head_bottom = (mid_sho + head_center) / 2  # nose and head center
    head_top = head_bottom + (head_center - head_bottom) * 2
    campus_pose[12] += head_bottom
    campus_pose[13] += head_top

    return campus_pose

def compute_per_frame_metrics(db_entry):
    """
    Ok i think we just put this in evaluate. 
    We add the metric logging in evaluate, and save the predictions, joints, metrics
    all in a json entry.

    db etnry contains GT joints and Pred pose 2d.

    Wish that we had GT IDs...should check what the GT looks like and 
    see how it looks. I imagine it is sorted. 

    Returns per-person MPJPEs
    Returns per-person PCP3Ds.
    """
    db_gt = np.array(db_entry['joints_3d'])
    # Do some pre-processing on this to get it into the right format for evaluation.
    pred_coco = np.array(db_entry['pred_pose2d'])
    pred_coco = pred_coco[pred_coco[:, 0, 3] >= 0, :, :3]
    pred = np.stack([coco2campus3D(p) for p in pred_coco[:, :, :3]])
    num_person = db_gt.shape[0]

    all_mpjpes = []
    #all_pcps = []
    for person in range(num_person):
        gt = db_gt[person]
        mpjpes = np.mean(np.sqrt(np.sum((gt[np.newaxis] - pred) ** 2, axis=-1)), axis=-1)
        min_n = np.argmin(mpjpes)
        min_mpjpe = np.min(mpjpes)
        all_mpjpes.append(min_mpjpe)
    
    return all_mpjpes

In [4]:
compute_per_frame_metrics(json_data[0])

KeyError: 'joints_3d'

In [5]:
image_dir_path = "video_viz_campus"
video_output_path = "./test_campus_cam2.avi"
image_list = sorted(os.listdir(image_dir_path), key=lambda x: int(x[6:-4]))
# Read the first image and check its shape to initialize video 
# writer.
read_img = cv2.imread(os.path.join(image_dir_path, image_list[0]))
(height, width, _) = read_img.shape
size = (width, height)
video_writer = cv2.VideoWriter(video_output_path, cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

for image in tqdm(image_list):
    img = cv2.imread(os.path.join(image_dir_path, image))
    video_writer.write(img)
    
video_writer.release()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 222/222 [00:03<00:00, 71.95it/s]


In [None]:
image_list

['image_0.png',
 'image_1.png',
 'image_2.png',
 'image_3.png',
 'image_4.png',
 'image_5.png',
 'image_6.png',
 'image_7.png',
 'image_8.png',
 'image_9.png',
 'image_10.png',
 'image_11.png',
 'image_12.png',
 'image_13.png',
 'image_14.png',
 'image_15.png',
 'image_16.png',
 'image_17.png',
 'image_18.png',
 'image_19.png',
 'image_20.png',
 'image_21.png',
 'image_22.png',
 'image_23.png',
 'image_24.png',
 'image_25.png',
 'image_26.png',
 'image_27.png',
 'image_28.png',
 'image_29.png',
 'image_30.png',
 'image_31.png',
 'image_32.png',
 'image_33.png',
 'image_34.png',
 'image_35.png',
 'image_36.png',
 'image_37.png',
 'image_38.png',
 'image_39.png',
 'image_40.png',
 'image_41.png',
 'image_42.png',
 'image_43.png',
 'image_44.png',
 'image_45.png',
 'image_46.png',
 'image_47.png',
 'image_48.png',
 'image_49.png',
 'image_50.png',
 'image_51.png',
 'image_52.png',
 'image_53.png',
 'image_54.png',
 'image_55.png',
 'image_56.png',
 'image_57.png',
 'image_58.png',
 'image

In [None]:
# Well we technically don't need 