In [1]:
import sys
sys.path.append("/home/rchoudhu/research/voxelpose-pytorch/lib")

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.utils.data
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter
import argparse
import pprint
import logging
import json
import time
%matplotlib agg
import matplotlib.pyplot as plt

from tqdm import tqdm

#import _init_paths
from core.config import config
from core.config import update_config
from core.function import train_3d, validate_3d
from utils.utils import create_logger
from utils.utils import save_checkpoint, load_checkpoint, load_model_state
from utils.utils import load_backbone_panoptic
from utils.vis import save_debug_3d_images
import dataset
import models

viz_test_dir = "video_viz"

In [2]:
cfg = "configs/shelf/prn64_cpn80x80x20.yaml"
update_config(cfg)
logger, final_output_dir, tb_log_dir = create_logger(config, cfg, 'validate')

gpus = [int(i) for i in config.GPUS.split(',')]
print('=> Loading data ..')
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
print(config.DATASET.TEST_SUBSET)
test_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)(
    config, config.DATASET.TEST_SUBSET, False,
    transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

print(test_dataset.image_set)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=config.TEST.BATCH_SIZE * len(gpus),
    shuffle=False,
    num_workers=config.WORKERS,
        pin_memory=True)

print('=> Done loading data.')

=> load /home/rchoudhu/research/voxelpose-pytorch/data/Shelf/pred_shelf_maskrcnn_hrnet_coco.pkl


=> creating /home/rchoudhu/research/voxelpose-pytorch/output/shelf_synthetic/multi_person_posenet_50/prn64_cpn80x80x20
=> creating /home/rchoudhu/research/voxelpose-pytorch/log/shelf_synthetic/multi_person_posenet_50/prn64_cpn80x80x202022-02-01-03-24
=> Loading data ..


  actor_3d = np.array(np.array(data['actor3D'].tolist()).tolist()).squeeze()  # num_person * num_frame


validation
=> Done loading data.


In [3]:
print('=> Constructing models ..')
# Set is_train to false
model = eval('models.' + config.MODEL + '.get_multi_person_pose_net')(
    config, is_train=False)
print("Done constructing models.")

new_gpus = [i for i in range(len(gpus))]
gpus = new_gpus
print("Setting data parallel with gpus: " + str(gpus))
start_time = time.time()
with torch.no_grad():
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
print("Took %.3f to set up data parallel" % (time.time() - start_time))

=> Constructing models ..
Done constructing models.
Setting data parallel with gpus: [0]
Took 2.823 to set up data parallel


In [4]:
test_model_file = os.path.join(final_output_dir, config.TEST.MODEL_FILE)
if config.TEST.MODEL_FILE and os.path.isfile(test_model_file):
    logger.info('=> load models state {}'.format(test_model_file))
    model.module.load_state_dict(torch.load(test_model_file))
else:
    raise ValueError('Check the model file for testing!')

#print("Uncomment to actually run the validation, but won't tell us anything we don't know yet")
validate_3d(config, model, test_loader, final_output_dir)

=> load models state /home/rchoudhu/research/voxelpose-pytorch/output/shelf_synthetic/multi_person_posenet_50/prn64_cpn80x80x20/model_best.pth.tar
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  indices_x = (indices // (shape[1] * shape[2])).reshape(batch_size, num_people, -1)
  indices_y = ((indices % (shape[1] * shape[2])) // shape[2]).reshape(batch_size, num_people, -1)
Test: [0/76]	Time: 6.175s (6.175s)	Speed: 3.2 samples/s	Data: 4.382s (4.382s)	Memory 68077056.0
Test: [75/76]	Time: 0.103s (0.863s)	Speed: 48.6 samples/s	Data: 0.001s (0.199s)	Memory 37667840.0
  actor_3d = np.array(np.array(data['actor3D'].tolist()).tolist()).squeeze()  # num_person * num_frame
     | Actor 1 | Actor 2 | Actor 3 | Average | 
 PCP |  93.33  |  0.00  |  80.00  |  57.78  |	 Recall@500mm: 1.0000


num person :  4 
gt shape : (14, 3)
gt shape : (14, 3)
num person :  4 
gt shape : (14, 3)
gt shape : (14, 3)
num person :  4 
gt shape : (14, 3)
gt shape : (14, 3)


0.5777777775851852

In [46]:
# Viewing/rendering; iterate through data loader. Rewrite the file to a temp output. 
# then put it together with your normal video script stuff. Final output is a 3d video of the view.
# gotta be a matplotlib issue. 
# shelf / campus (have different skeleton format than the others. )
LIMBS14 = [[0, 1], [1, 2], [3, 4], [4, 5], [2, 3], [6, 7], [7, 8], [9, 10],
          [10, 11], [2, 8], [3, 9], [8, 12], [9, 12], [12, 13]]

#NUM_IMAGES = 30
#preds = []
#file_name = "test_viz2.png"
colors = ['r', 'g', 'b', 'black']
plot_calls = 0
cur_num_person = 0
for i, (inputs, target_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(test_loader):
    fig = plt.figure()
    ax = plt.axes(projection='3d')
    #if i > NUM_IMAGES: 
    #    break
    meta = meta[0]
    #print("viz")
    # Just visualize the ground truth for now. 
    # weird tensor access...
    num_person = meta['num_person'][0].numpy()
    if num_person != cur_num_person:
        print("frame %d : %d " % (i, num_person))
        cur_num_person = num_person

    joints_3d = meta['joints_3d'][0]
    joints_3d_vis = meta['joints_3d_vis'][0]
    #ax = plt.subplot(1, 1, 1, projection='3d')
    for n in range(num_person):
        #print(n)
        joint = joints_3d[n]
        joint_vis = joints_3d_vis[n]
        #print(joint.shape)
        # getting rly cute with the eval usage aren't we
        for k in eval("LIMBS{}".format(len(joint))):
            #print("plotting joint : " + str(k))
            # if joint_vis[k[0], 0] and joint_vis[k[1], 0]:
            #     x = [float(joint_vis[k[0], 0]), float(joint_vis[k[1], 0])]
            #     y = [float(joint_vis[k[0], 1]), float(joint_vis[k[1], 1])]
            #     z = [float(joint_vis[k[0], 2]), float(joint_vis[k[1], 2])]
            #     ax.plot(x, y, z, c='g', lw=1.5, marker='o', markerfacecolor='w', markersize=2,
            #             markeredgewidth=1)
            # else:
            x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
            y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
            z = [float(joint[k[0], 2]), float(joint[k[1], 2])]

            ax.plot(x, y, z, c=colors[n], ls='--', lw=1.5, marker='o', markerfacecolor='w', markersize=2,
                    markeredgewidth=1)
    plt.savefig(os.path.join(viz_test_dir, "image_%d.png" % i))
    plt.close(fig)

2
1
2
1
0
1
3
2
1
2
3
2
0


In [47]:
image_dir_path = viz_test_dir
video_output_path = "./test.avi"
image_list = sorted(os.listdir(image_dir_path))
# Read the first image and check its shape to initialize video 
# writer.
read_img = cv2.imread(os.path.join(image_dir_path, image_list[0]))
(height, width, _) = read_img.shape
size = (width, height)
video_writer = cv2.VideoWriter(video_output_path, cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

for image in tqdm(image_list):
    img = cv2.imread(os.path.join(image_dir_path, image))
    video_writer.write(img)
    
video_writer.release()

100%|██████████| 76/76 [00:00<00:00, 147.88it/s]


In [4]:
# we can't re-use their viz stuff, it's useless.
# will need to write our own. Key here is getting the right stuff frmo the dataloader
# in order. THen once we have the preds, can also try to visualize. the batch thing
# is sort of meaningless.....


In [None]:
# Well we technically don't need 