In [33]:
import numpy as np
import os
import torch
from torch.utils.data import DataLoader
import wandb

from camera import world_to_camera, normalize_screen_coordinates
from humaneva_dataset import HumanEvaDataset
from loss import mpjpe
from model import FrameModel
from run import run
from preprocessed_dataset import PreprocessedDataset
from main import Args, fetch
from visualization import visualize

In [35]:
args = Args()
he_dataset = HumanEvaDataset(args.dataset_path)

# convert 3D pose world coordinates to camera coordinates
for subject in he_dataset.subjects():
    for action in he_dataset[subject].keys():
        anim = he_dataset[subject][action]
        if 'positions' in anim:
            positions_3d = []
            for cam in anim['cameras']:
                pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
                pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
                positions_3d.append(pos_3d)
            anim['positions_3d'] = positions_3d

# get 2D keypoints
keypoints = np.load(args.dataset_2d_path, allow_pickle=True)
keypoints_metadata = keypoints['metadata'].item()
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
joints_left, joints_right = list(he_dataset.skeleton().joints_left()), list(he_dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item() 

# convert 2D pose world coordinates to screen coordinates
for subject in keypoints.keys():
    for action in keypoints[subject]:
        for cam_idx, kps in enumerate(keypoints[subject][action]):
            # Normalize camera frame
            cam = he_dataset.cameras()[subject][cam_idx]
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
            keypoints[subject][action][cam_idx] = kps

poses_train_3d, poses_train_2d, cameras_train = fetch(args.subjects_train, keypoints, he_dataset, args.actions_train)
poses_val_3d, poses_val_2d, cameras_val = fetch(args.subjects_val, keypoints, he_dataset, args.actions_val)

train_dataset = PreprocessedDataset(poses_train_2d, poses_train_3d, cameras_train, 
                                    keypoints_metadata, he_dataset.skeleton(), he_dataset.fps())
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=0, shuffle=True)
val_dataset = PreprocessedDataset(poses_val_2d, poses_val_3d, cameras_val,
                                  keypoints_metadata, he_dataset.skeleton(), he_dataset.fps())
val_dataloader = DataLoader(val_dataset, batch_size=1, num_workers=0, shuffle=False)

In [38]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
saved_model_fp = 'data/pleasant-shape/model.pth'
model = torch.load(saved_model_fp).to(device)
_ = model.eval()
criterion = mpjpe

In [40]:
mx_loss = 0
mx_idx = -1
for idx, batch in enumerate(val_dataloader):
    model_inp = batch[0].to(device)
    target = batch[1].to(device)
    with torch.no_grad():
        preds = model(model_inp)
    loss = criterion(preds, target).cpu().item()
    if loss > mx_loss:
        mx_loss = loss
        mx_idx = idx

In [51]:
data_2d = torch.from_numpy(val_dataset[mx_idx][0]).unsqueeze(0).numpy()
with torch.no_grad():
    preds = model(torch.from_numpy(val_dataset[mx_idx][0]).unsqueeze(0).to(device))
pred_3d = preds.cpu().numpy()
targ_3d = torch.from_numpy(val_dataset[mx_idx][1]).unsqueeze(0).numpy()
cam = {key: val for key, val in val_dataset[mx_idx][2].items()}
cam['orientation'] = cam['orientation']
cam['translation'] = cam['translation']
cam['res_w'] = cam['res_w']
cam['res_h'] = cam['res_h']
cam['azimuth'] = cam['azimuth']
visualize(data_2d.copy(), targ_3d.copy(), pred_3d.copy(), 
          val_dataset.keypoints_metadata, cam, val_dataset.skeleton, 
          val_dataset.fps, output_fp='tmp.gif')

MovieWriter imagemagick unavailable; using Pillow instead.


0/1      

In [13]:
data_2d = batch[0][:15].numpy()
pred_3d = preds[:15].cpu().numpy()
targ_3d = batch[1][:15].numpy()
cam = {key: val[0] for key, val in batch[2].items()}
cam['orientation'] = cam['orientation'].numpy()
cam['translation'] = cam['translation'].numpy()
cam['res_w'] = cam['res_w'].item()
cam['res_h'] = cam['res_h'].item()
cam['azimuth'] = cam['azimuth'].item()
visualize(data_2d.copy(), targ_3d.copy(), pred_3d.copy(), 
          val_dataset.keypoints_metadata, cam, val_dataset.skeleton, 
          val_dataset.fps, output_fp='tmp.gif')

MovieWriter imagemagick unavailable; using Pillow instead.


14/15      

In [27]:
keypoints = np.load(args.dataset_2d_path, allow_pickle=True)