In [1]:
import numpy as np

#from common.arguments import parse_args
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import sys
import errno

from common.camera import *
from common.model import *
from common.loss import *
from common.generators import ChunkedGenerator, UnchunkedGenerator
from time import time
from common.utils import deterministic_random

from common.h36m_dataset import Human36mDataset


In [2]:
class Args:
    dataset = 'h36m'
    keypoints = 'gt'
    subjects_train = ['S1','S5','S6','S7','S8']
    subjects_test = ['S9','S11']
    subjects_unlabeled = ''
    actions = '*'
    checkpoint = 'checkpoint243'
    checkpoint_frequency = 10
    render = False
    data_augmentation = True
    
    #Model
    stride = 1
    epochs = 80
    batch_size = 1024
    dropout = 0.25
    learning_rate = 0.001
    lr_decay = 0.95
    architecture = '3,3,3,3,3'
    causal = True
    channels = 1024
    resume = False
    evaluate = ''
    
    #Experimental
    subset = 1
    downsample = 1
    warmup = 1
    disable_optimizations = False
    dense = False
    no_eval = False
    
args = Args()
print(args.dataset)
    

h36m


In [3]:
dataset_path = dataset_path = 'data/data_3d_' + args.dataset + '.npz'
print(dataset_path)
dataset = Human36mDataset(dataset_path)

data/data_3d_h36m.npz


In [4]:
print('Preparing 3d data...')
for subject in dataset.subjects():
    for action in dataset[subject].keys():
        anim = dataset[subject][action]
        
        if 'positions' in anim:
            positions_3d = []
            for cam in anim['cameras']:
                pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
                pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
                positions_3d.append(pos_3d)
            anim['positions_3d'] = positions_3d

Preparing 3d data...


In [5]:
print('Loading 2D detections...')
keypoints = np.load('data/data_2d_' + args.dataset + '_' + args.keypoints + '.npz', allow_pickle=True)
keypoints_metadata = keypoints['metadata'].item()
keypoints_symmetry = keypoints_metadata['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()

Loading 2D detections...


In [6]:
for subject in dataset.subjects():
    assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format(subject)
    for action in dataset[subject].keys():
        assert action in keypoints[subject], 'Action {} of subject {} is missing from the 2D detections dataset'.format(action, subject)
        if 'positions_3d' not in dataset[subject][action]:
            continue
            
        for cam_idx in range(len(keypoints[subject][action])):
            
            # We check for >= instead of == because some videos in H3.6M contain extra frames
            mocap_length = dataset[subject][action]['positions_3d'][cam_idx].shape[0]
            assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length
            
            if keypoints[subject][action][cam_idx].shape[0] > mocap_length:
                # Shorten sequence
                keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length]

        assert len(keypoints[subject][action]) == len(dataset[subject][action]['positions_3d'])

In [7]:
for subject in keypoints.keys():
    for action in keypoints[subject]:
        for cam_idx, kps in enumerate(keypoints[subject][action]):
            # Normalize camera frame
            cam = dataset.cameras()[subject][cam_idx]
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
            keypoints[subject][action][cam_idx] = kps
subjects_train = args.subjects_train
subjects_semi = [] if not args.subjects_unlabeled else args.subjects_unlabeled.split(',')
if not args.render:
    subjects_test = args.subjects_test
else:
    subjects_test = [args.viz_subject]

semi_supervised = len(subjects_semi) > 0

In [8]:
def fetch(subjects, action_filter=None, subset=1, parse_3d_poses=True):
    out_poses_3d = []
    out_poses_2d = []
    out_camera_params = []
    for subject in subjects:
        for action in keypoints[subject].keys():
            ############################
            if action_filter is not None:
                found = False
                for a in action_filter:
                    if action.startswith(a):
                        found = True
                        break
                if not found:
                    continue
            ############################    
            poses_2d = keypoints[subject][action]
            for i in range(len(poses_2d)): # Iterate across cameras
                out_poses_2d.append(poses_2d[i])
            # 카메라 4방향 이어주기
            
            if subject in dataset.cameras():
                cams = dataset.cameras()[subject]
                assert len(cams) == len(poses_2d), 'Camera count mismatch'
                for cam in cams:
                    if 'intrinsic' in cam:
                        out_camera_params.append(cam['intrinsic'])
            # 카메라 내부 파라미터
            
            if parse_3d_poses and 'positions_3d' in dataset[subject][action]:
                poses_3d = dataset[subject][action]['positions_3d']
                assert len(poses_3d) == len(poses_2d), 'Camera count mismatch'
                for i in range(len(poses_3d)): # Iterate across cameras
                    out_poses_3d.append(poses_3d[i])
    
    if len(out_camera_params) == 0:
        out_camera_params = None
    if len(out_poses_3d) == 0:
        out_poses_3d = None
    
    stride = args.downsample
    if subset < 1:
        for i in range(len(out_poses_2d)):
            n_frames = int(round(len(out_poses_2d[i])//stride * subset)*stride)
            start = deterministic_random(0, len(out_poses_2d[i]) - n_frames + 1, str(len(out_poses_2d[i])))
            out_poses_2d[i] = out_poses_2d[i][start:start+n_frames:stride]
            if out_poses_3d is not None:
                out_poses_3d[i] = out_poses_3d[i][start:start+n_frames:stride]
    elif stride > 1:
        # Downsample as requested
        for i in range(len(out_poses_2d)):
            out_poses_2d[i] = out_poses_2d[i][::stride]
            if out_poses_3d is not None:
                out_poses_3d[i] = out_poses_3d[i][::stride]
    

    return out_camera_params, out_poses_3d, out_poses_2d

In [9]:
action_filter = None if args.actions == '*' else args.actions.split(',')
if action_filter is not None:
    print('Selected actions:', action_filter)


In [10]:
cameras_valid, poses_valid, poses_valid_2d = fetch(subjects_test, action_filter)

In [11]:
filter_widths = [int(x) for x in args.architecture.split(',')]

In [12]:
#모델 생성
if not args.disable_optimizations and not args.dense and args.stride == 1:
    # Use optimized model for single-frame predictions
    model_pos_train = TemporalModelOptimized1f(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(),
                                filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels)
else:
    # When incompatible settings are detected (stride > 1, dense filters, or disabled optimization) fall back to normal model
    model_pos_train = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(),
                                filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels,
                                dense=args.dense)
    

In [13]:
model_pos = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(),
                            filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels,
                            dense=args.dense)
model_pos.cuda()
receptive_field = model_pos.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))

INFO: Receptive field: 243 frames


In [14]:
pad = (receptive_field - 1) // 2 # Padding on each side
if args.causal:
    print('INFO: Using causal convolutions')
    causal_shift = pad
else:
    causal_shift = 0

INFO: Using causal convolutions


In [15]:
import torchsummary
torchsummary.summary(model_pos,input_size = (243,17,2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 1024, 241]         104,448
       BatchNorm1d-2            [-1, 1024, 241]           2,048
              ReLU-3            [-1, 1024, 241]               0
           Dropout-4            [-1, 1024, 241]               0
            Conv1d-5            [-1, 1024, 235]       3,145,728
       BatchNorm1d-6            [-1, 1024, 235]           2,048
              ReLU-7            [-1, 1024, 235]               0
           Dropout-8            [-1, 1024, 235]               0
            Conv1d-9            [-1, 1024, 235]       1,048,576
      BatchNorm1d-10            [-1, 1024, 235]           2,048
             ReLU-11            [-1, 1024, 235]               0
          Dropout-12            [-1, 1024, 235]               0
           Conv1d-13            [-1, 1024, 217]       3,145,728
      BatchNorm1d-14            [-1, 10

In [16]:
model_params = 0
for parameter in model_pos.parameters():
    model_params += parameter.numel()
print('INFO: Trainable parameter count:', model_params)

INFO: Trainable parameter count: 16952371


In [17]:
if torch.cuda.is_available():
    model_pos = model_pos.cuda()
    model_pos_train = model_pos_train.cuda()

In [18]:
# if args.resume or args.evaluate:
#     chk_filename = os.path.join(args.checkpoint, args.resume if args.resume else args.evaluate)
#     print('Loading checkpoint', chk_filename)
#     checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
#     print('This model was trained for {} epochs'.format(checkpoint['epoch']))
#     model_pos_train.load_state_dict(checkpoint['model_pos'])
#     model_pos.load_state_dict(checkpoint['model_pos'])

In [19]:
test_generator = UnchunkedGenerator(cameras_valid, poses_valid, poses_valid_2d,
                                    pad=pad, causal_shift=causal_shift, augment=False,
                                    kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
print('INFO: Testing on {} frames'.format(test_generator.num_frames()))

INFO: Testing on 543344 frames


In [20]:
#Train 일 경우

cameras_train, poses_train, poses_train_2d = fetch(subjects_train, action_filter, subset=args.subset)

lr = args.learning_rate

optimizer = optim.Adam(model_pos_train.parameters(), lr=lr, amsgrad=True)

lr_decay = args.lr_decay

losses_3d_train = []
losses_3d_train_eval = []
losses_3d_valid = []

epoch = 0
initial_momentum = 0.1
final_momentum = 0.001


train_generator = ChunkedGenerator(args.batch_size//args.stride, cameras_train, poses_train, poses_train_2d, args.stride,
                                   pad=pad, causal_shift=causal_shift, shuffle=True, augment=args.data_augmentation,
                                   kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
train_generator_eval = UnchunkedGenerator(cameras_train, poses_train, poses_train_2d,
                                          pad=pad, causal_shift=causal_shift, augment=False)
print('INFO: Training on {} frames'.format(train_generator_eval.num_frames()))

INFO: Training on 1559752 frames


In [21]:
from tqdm.auto import tqdm

while epoch < args.epochs:
        start_time = time()
        epoch_loss_3d_train = 0
        N = 0
        N_semi = 0
        model_pos_train.train()
        
        for _, batch_3d, batch_2d in tqdm(train_generator.next_epoch()):
                inputs_3d = torch.from_numpy(batch_3d.astype('float32'))
                inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                if torch.cuda.is_available():
                    inputs_3d = inputs_3d.cuda()
                    inputs_2d = inputs_2d.cuda()
                inputs_3d[:, :, 0] = 0

                optimizer.zero_grad()

                # Predict 3D poses
                predicted_3d_pos = model_pos_train(inputs_2d)
                loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
                epoch_loss_3d_train += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item()
                N += inputs_3d.shape[0]*inputs_3d.shape[1]

                loss_total = loss_3d_pos
                loss_total.backward()

                optimizer.step()

        losses_3d_train.append(epoch_loss_3d_train / N)
        
        # End-of-epoch evaluation
        with torch.no_grad():
            model_pos.load_state_dict(model_pos_train.state_dict())
            model_pos.eval()
            
            epoch_loss_3d_valid = 0
            epoch_loss_2d_valid = 0
            N = 0
            
            if not args.no_eval:
                # Evaluate on test set
                for cam, batch, batch_2d in test_generator.next_epoch():
                    inputs_3d = torch.from_numpy(batch.astype('float32'))
                    inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                    if torch.cuda.is_available():
                        inputs_3d = inputs_3d.cuda()
                        inputs_2d = inputs_2d.cuda()
                    inputs_traj = inputs_3d[:, :, :1].clone()
                    inputs_3d[:, :, 0] = 0

                    # Predict 3D poses
                    predicted_3d_pos = model_pos(inputs_2d)
                    loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
                    epoch_loss_3d_valid += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item()
                    N += inputs_3d.shape[0]*inputs_3d.shape[1]
                losses_3d_valid.append(epoch_loss_3d_valid / N)
                
                # Evaluate on training set, this time in evaluation mode
                epoch_loss_3d_train_eval = 0
                epoch_loss_traj_train_eval = 0
                epoch_loss_2d_train_labeled_eval = 0
                N = 0
                for cam, batch, batch_2d in train_generator_eval.next_epoch():
                    if batch_2d.shape[1] == 0:
                        # This can only happen when downsampling the dataset
                        continue
                    inputs_3d = torch.from_numpy(batch.astype('float32'))
                    inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
                    if torch.cuda.is_available():
                        inputs_3d = inputs_3d.cuda()
                        inputs_2d = inputs_2d.cuda()
                    inputs_traj = inputs_3d[:, :, :1].clone()
                    inputs_3d[:, :, 0] = 0
                    
                    # Compute 3D poses
                    predicted_3d_pos = model_pos(inputs_2d)
                    loss_3d_pos = mpjpe(predicted_3d_pos, inputs_3d)
                    epoch_loss_3d_train_eval += inputs_3d.shape[0]*inputs_3d.shape[1] * loss_3d_pos.item()
                    N += inputs_3d.shape[0]*inputs_3d.shape[1]
                losses_3d_train_eval.append(epoch_loss_3d_train_eval / N)
                
                epoch_loss_2d_train_unlabeled_eval = 0
                N_semi = 0
        elapsed = (time() - start_time)/60
        
        if args.no_eval:
            print('[%d] time %.2f lr %f 3d_train %f' % (
                    epoch + 1,
                    elapsed,
                    lr,
                    losses_3d_train[-1] * 1000))
        else:
            print('[%d] time %.2f lr %f 3d_train %f 3d_eval %f 3d_valid %f' % (
                        epoch + 1,
                        elapsed,
                        lr,
                        losses_3d_train[-1] * 1000,
                        losses_3d_train_eval[-1] * 1000,
                        losses_3d_valid[-1]  *1000))
        # Decay learning rate exponentially
        lr *= lr_decay
        for param_group in optimizer.param_groups:
            param_group['lr'] *= lr_decay
        epoch += 1
        
        # Decay BatchNorm momentum
        momentum = initial_momentum * np.exp(-epoch/args.epochs * np.log(initial_momentum/final_momentum))
        model_pos_train.set_bn_momentum(momentum)
        
        # Save checkpoint if necessary
        if epoch % args.checkpoint_frequency == 0:
            chk_path = os.path.join(args.checkpoint, 'epoch_{}.pth'.format(epoch))
            print('Saving checkpoint to', chk_path)
            
            torch.save({
                'epoch': epoch,
                'lr': lr,
                'random_state': train_generator.random_state(),
                'optimizer': optimizer.state_dict(),
                'model_pos': model_pos_train.state_dict(),
                'model_traj': model_traj_train.state_dict() if semi_supervised else None,
                'random_state_semi': semi_generator.random_state() if semi_supervised else None,
            }, chk_path)

0it [00:00, ?it/s]

[1] time 5.33 lr 0.001000 3d_train 116.890960 3d_eval 355.191790 3d_valid 337.576410


0it [00:00, ?it/s]

[2] time 5.34 lr 0.000950 3d_train 50.087647 3d_eval 33.756590 3d_valid 45.796654


0it [00:00, ?it/s]

[3] time 5.36 lr 0.000902 3d_train 39.621540 3d_eval 27.870326 3d_valid 43.891596


0it [00:00, ?it/s]

[4] time 5.36 lr 0.000857 3d_train 35.218264 3d_eval 24.933068 3d_valid 41.749663


0it [00:00, ?it/s]

[5] time 5.35 lr 0.000815 3d_train 32.677296 3d_eval 21.873658 3d_valid 40.598365


0it [00:00, ?it/s]

[6] time 5.35 lr 0.000774 3d_train 30.973289 3d_eval 20.972903 3d_valid 41.063472


0it [00:00, ?it/s]

[7] time 5.33 lr 0.000735 3d_train 29.544377 3d_eval 20.902796 3d_valid 39.909230


0it [00:00, ?it/s]

[8] time 5.42 lr 0.000698 3d_train 28.502274 3d_eval 20.422116 3d_valid 40.493350


0it [00:00, ?it/s]

[9] time 5.38 lr 0.000663 3d_train 27.637595 3d_eval 19.873062 3d_valid 40.219244


0it [00:00, ?it/s]

[10] time 5.37 lr 0.000630 3d_train 26.923166 3d_eval 18.379285 3d_valid 40.168619
Saving checkpoint to checkpoint243/epoch_10.pth


0it [00:00, ?it/s]

[11] time 5.38 lr 0.000599 3d_train 26.337419 3d_eval 16.829786 3d_valid 41.033299


0it [00:00, ?it/s]

[12] time 5.37 lr 0.000569 3d_train 25.838789 3d_eval 16.603109 3d_valid 40.920560


0it [00:00, ?it/s]

[13] time 5.37 lr 0.000540 3d_train 25.361602 3d_eval 16.203176 3d_valid 40.471323


0it [00:00, ?it/s]

[14] time 5.37 lr 0.000513 3d_train 24.984937 3d_eval 15.897033 3d_valid 38.831563


0it [00:00, ?it/s]

[15] time 5.40 lr 0.000488 3d_train 24.637322 3d_eval 15.702632 3d_valid 39.562539


0it [00:00, ?it/s]

[16] time 5.35 lr 0.000463 3d_train 24.289378 3d_eval 14.946837 3d_valid 39.089323


0it [00:00, ?it/s]

[17] time 5.36 lr 0.000440 3d_train 24.005707 3d_eval 14.765918 3d_valid 39.064628


0it [00:00, ?it/s]

[18] time 5.36 lr 0.000418 3d_train 23.743726 3d_eval 14.632770 3d_valid 38.254508


0it [00:00, ?it/s]

[19] time 5.36 lr 0.000397 3d_train 23.518460 3d_eval 14.511460 3d_valid 38.564502


0it [00:00, ?it/s]

[20] time 5.37 lr 0.000377 3d_train 23.287615 3d_eval 14.670372 3d_valid 39.904604
Saving checkpoint to checkpoint243/epoch_20.pth


0it [00:00, ?it/s]

[21] time 5.28 lr 0.000358 3d_train 23.138689 3d_eval 14.120953 3d_valid 38.629822


0it [00:00, ?it/s]

[22] time 5.21 lr 0.000341 3d_train 22.920165 3d_eval 13.671640 3d_valid 38.131200


0it [00:00, ?it/s]

[23] time 5.21 lr 0.000324 3d_train 22.761301 3d_eval 13.874411 3d_valid 38.702158


0it [00:00, ?it/s]

[24] time 5.21 lr 0.000307 3d_train 22.598818 3d_eval 13.508005 3d_valid 38.757681


0it [00:00, ?it/s]

[25] time 5.21 lr 0.000292 3d_train 22.445936 3d_eval 13.809196 3d_valid 38.888877


0it [00:00, ?it/s]

[26] time 5.21 lr 0.000277 3d_train 22.309744 3d_eval 13.631745 3d_valid 39.026564


0it [00:00, ?it/s]

[27] time 5.21 lr 0.000264 3d_train 22.181499 3d_eval 13.106290 3d_valid 38.725325


0it [00:00, ?it/s]

[28] time 5.21 lr 0.000250 3d_train 22.041159 3d_eval 13.289114 3d_valid 38.272492


0it [00:00, ?it/s]

[29] time 5.21 lr 0.000238 3d_train 21.950886 3d_eval 13.305452 3d_valid 38.613943


0it [00:00, ?it/s]

[30] time 5.21 lr 0.000226 3d_train 21.856167 3d_eval 12.961989 3d_valid 38.703318
Saving checkpoint to checkpoint243/epoch_30.pth


0it [00:00, ?it/s]

[31] time 5.21 lr 0.000215 3d_train 21.743349 3d_eval 12.861254 3d_valid 38.828291


0it [00:00, ?it/s]

[32] time 5.21 lr 0.000204 3d_train 21.650394 3d_eval 13.683862 3d_valid 39.981729


0it [00:00, ?it/s]

[33] time 5.21 lr 0.000194 3d_train 21.598281 3d_eval 12.467966 3d_valid 38.944511


0it [00:00, ?it/s]

[34] time 5.21 lr 0.000184 3d_train 21.475128 3d_eval 12.600511 3d_valid 38.748769


0it [00:00, ?it/s]

[35] time 5.21 lr 0.000175 3d_train 21.411590 3d_eval 12.603541 3d_valid 39.492674


0it [00:00, ?it/s]

[36] time 5.21 lr 0.000166 3d_train 21.325329 3d_eval 12.644999 3d_valid 39.355841


0it [00:00, ?it/s]

[37] time 5.21 lr 0.000158 3d_train 21.254103 3d_eval 12.128708 3d_valid 39.050177


0it [00:00, ?it/s]

[38] time 5.21 lr 0.000150 3d_train 21.206333 3d_eval 12.452605 3d_valid 38.842064


0it [00:00, ?it/s]

[39] time 5.21 lr 0.000142 3d_train 21.133921 3d_eval 12.542133 3d_valid 39.021029


0it [00:00, ?it/s]

[40] time 5.21 lr 0.000135 3d_train 21.078054 3d_eval 12.055871 3d_valid 38.923244
Saving checkpoint to checkpoint243/epoch_40.pth


0it [00:00, ?it/s]

[41] time 5.21 lr 0.000129 3d_train 21.020990 3d_eval 12.022316 3d_valid 38.674885


0it [00:00, ?it/s]

[42] time 5.21 lr 0.000122 3d_train 20.962173 3d_eval 12.227107 3d_valid 39.297169


0it [00:00, ?it/s]

[43] time 5.21 lr 0.000116 3d_train 20.899207 3d_eval 12.257128 3d_valid 39.874805


0it [00:00, ?it/s]

[44] time 5.21 lr 0.000110 3d_train 20.869627 3d_eval 11.819356 3d_valid 38.402316


0it [00:00, ?it/s]

[45] time 5.21 lr 0.000105 3d_train 20.824658 3d_eval 11.845611 3d_valid 38.260401


0it [00:00, ?it/s]

[46] time 5.21 lr 0.000099 3d_train 20.780243 3d_eval 11.799086 3d_valid 38.457384


0it [00:00, ?it/s]

[47] time 5.21 lr 0.000094 3d_train 20.732103 3d_eval 11.756694 3d_valid 38.620203


0it [00:00, ?it/s]

[48] time 5.21 lr 0.000090 3d_train 20.693376 3d_eval 11.829856 3d_valid 38.899253


0it [00:00, ?it/s]

[49] time 5.21 lr 0.000085 3d_train 20.657083 3d_eval 11.794895 3d_valid 39.045839


0it [00:00, ?it/s]

[50] time 5.21 lr 0.000081 3d_train 20.630733 3d_eval 11.516406 3d_valid 38.703443
Saving checkpoint to checkpoint243/epoch_50.pth


0it [00:00, ?it/s]

[51] time 5.21 lr 0.000077 3d_train 20.599391 3d_eval 11.596773 3d_valid 38.847996


0it [00:00, ?it/s]

[52] time 5.21 lr 0.000073 3d_train 20.563764 3d_eval 11.600436 3d_valid 39.102986


0it [00:00, ?it/s]

[53] time 5.21 lr 0.000069 3d_train 20.539040 3d_eval 11.503220 3d_valid 38.499212


0it [00:00, ?it/s]

[54] time 5.21 lr 0.000066 3d_train 20.506021 3d_eval 11.550083 3d_valid 38.752591


0it [00:00, ?it/s]

[55] time 5.21 lr 0.000063 3d_train 20.477454 3d_eval 11.549540 3d_valid 38.384964


0it [00:00, ?it/s]

[56] time 5.21 lr 0.000060 3d_train 20.452519 3d_eval 11.563563 3d_valid 38.824467


0it [00:00, ?it/s]

[57] time 5.21 lr 0.000057 3d_train 20.425400 3d_eval 11.531846 3d_valid 38.541515


0it [00:00, ?it/s]

[58] time 5.21 lr 0.000054 3d_train 20.399459 3d_eval 11.503964 3d_valid 38.636926


0it [00:00, ?it/s]

[59] time 5.21 lr 0.000051 3d_train 20.381361 3d_eval 11.263473 3d_valid 38.651220


0it [00:00, ?it/s]

[60] time 5.21 lr 0.000048 3d_train 20.362182 3d_eval 11.310853 3d_valid 38.806888
Saving checkpoint to checkpoint243/epoch_60.pth


0it [00:00, ?it/s]

[61] time 5.21 lr 0.000046 3d_train 20.344863 3d_eval 11.215829 3d_valid 38.456331


0it [00:00, ?it/s]

[62] time 5.21 lr 0.000044 3d_train 20.320562 3d_eval 11.336555 3d_valid 38.884721


0it [00:00, ?it/s]

[63] time 5.21 lr 0.000042 3d_train 20.304228 3d_eval 11.284233 3d_valid 38.413521


0it [00:00, ?it/s]

[64] time 5.21 lr 0.000039 3d_train 20.290248 3d_eval 11.295044 3d_valid 38.609171


0it [00:00, ?it/s]

[65] time 5.21 lr 0.000038 3d_train 20.267818 3d_eval 11.124909 3d_valid 38.983074


0it [00:00, ?it/s]

[66] time 5.21 lr 0.000036 3d_train 20.255464 3d_eval 11.318809 3d_valid 39.383180


0it [00:00, ?it/s]

[67] time 5.21 lr 0.000034 3d_train 20.240259 3d_eval 11.228617 3d_valid 38.616482


0it [00:00, ?it/s]

[68] time 5.21 lr 0.000032 3d_train 20.232035 3d_eval 11.264135 3d_valid 38.441254


0it [00:00, ?it/s]

[69] time 5.21 lr 0.000031 3d_train 20.209382 3d_eval 11.302608 3d_valid 38.769610


0it [00:00, ?it/s]

[70] time 5.21 lr 0.000029 3d_train 20.201851 3d_eval 11.286225 3d_valid 38.574142
Saving checkpoint to checkpoint243/epoch_70.pth


0it [00:00, ?it/s]

[71] time 5.21 lr 0.000028 3d_train 20.190093 3d_eval 11.160279 3d_valid 38.566375


0it [00:00, ?it/s]

[72] time 5.21 lr 0.000026 3d_train 20.177460 3d_eval 11.191695 3d_valid 38.807778


0it [00:00, ?it/s]

[73] time 5.20 lr 0.000025 3d_train 20.161025 3d_eval 11.125308 3d_valid 38.732066


0it [00:00, ?it/s]

[74] time 5.20 lr 0.000024 3d_train 20.150553 3d_eval 11.220531 3d_valid 38.751868


0it [00:00, ?it/s]

[75] time 5.20 lr 0.000022 3d_train 20.148501 3d_eval 11.271598 3d_valid 38.607541


0it [00:00, ?it/s]

[76] time 5.20 lr 0.000021 3d_train 20.137316 3d_eval 11.155431 3d_valid 38.451292


0it [00:00, ?it/s]

[77] time 5.20 lr 0.000020 3d_train 20.128599 3d_eval 11.144589 3d_valid 38.638817


0it [00:00, ?it/s]

[78] time 5.20 lr 0.000019 3d_train 20.123188 3d_eval 11.128915 3d_valid 38.574748


0it [00:00, ?it/s]

[79] time 5.20 lr 0.000018 3d_train 20.113758 3d_eval 11.063289 3d_valid 38.741394


0it [00:00, ?it/s]

[80] time 5.20 lr 0.000017 3d_train 20.102735 3d_eval 11.175815 3d_valid 38.593270
Saving checkpoint to checkpoint243/epoch_80.pth


In [22]:
def evaluate(test_generator, action=None, return_predictions=False, use_trajectory_model=False):
    epoch_loss_3d_pos = 0
    epoch_loss_3d_pos_procrustes = 0
    epoch_loss_3d_pos_scale = 0
    epoch_loss_3d_vel = 0
    with torch.no_grad():
        if not use_trajectory_model:
            model_pos.eval()
        else:
            model_traj.eval()
        N = 0
        for _, batch, batch_2d in test_generator.next_epoch():
            inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
            if torch.cuda.is_available():
                inputs_2d = inputs_2d.cuda()

            # Positional model
            if not use_trajectory_model:
                predicted_3d_pos = model_pos(inputs_2d)
            else:
                predicted_3d_pos = model_traj(inputs_2d)

            # Test-time augmentation (if enabled)
            if test_generator.augment_enabled():
                # Undo flipping and take average with non-flipped version
                predicted_3d_pos[1, :, :, 0] *= -1
                if not use_trajectory_model:
                    predicted_3d_pos[1, :, joints_left + joints_right] = predicted_3d_pos[1, :, joints_right + joints_left]
                predicted_3d_pos = torch.mean(predicted_3d_pos, dim=0, keepdim=True)
                
            if return_predictions:
                return predicted_3d_pos.squeeze(0).cpu().numpy()
                
            inputs_3d = torch.from_numpy(batch.astype('float32'))
            if torch.cuda.is_available():
                inputs_3d = inputs_3d.cuda()
            inputs_3d[:, :, 0] = 0    
            if test_generator.augment_enabled():
                inputs_3d = inputs_3d[:1]

            error = mpjpe(predicted_3d_pos, inputs_3d)
            epoch_loss_3d_pos_scale += inputs_3d.shape[0]*inputs_3d.shape[1] * n_mpjpe(predicted_3d_pos, inputs_3d).item()

            epoch_loss_3d_pos += inputs_3d.shape[0]*inputs_3d.shape[1] * error.item()
            N += inputs_3d.shape[0] * inputs_3d.shape[1]
            
            inputs = inputs_3d.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])
            predicted_3d_pos = predicted_3d_pos.cpu().numpy().reshape(-1, inputs_3d.shape[-2], inputs_3d.shape[-1])

            epoch_loss_3d_pos_procrustes += inputs_3d.shape[0]*inputs_3d.shape[1] * p_mpjpe(predicted_3d_pos, inputs)

            # Compute velocity error
            epoch_loss_3d_vel += inputs_3d.shape[0]*inputs_3d.shape[1] * mean_velocity_error(predicted_3d_pos, inputs)
            
    if action is None:
        print('----------')
    else:
        print('----'+action+'----')
    e1 = (epoch_loss_3d_pos / N)*1000
    e2 = (epoch_loss_3d_pos_procrustes / N)*1000
    e3 = (epoch_loss_3d_pos_scale / N)*1000
    ev = (epoch_loss_3d_vel / N)*1000
    print('Test time augmentation:', test_generator.augment_enabled())
    print('Protocol #1 Error (MPJPE):', e1, 'mm')
    print('Protocol #2 Error (P-MPJPE):', e2, 'mm')
    print('Protocol #3 Error (N-MPJPE):', e3, 'mm')
    print('Velocity Error (MPJVE):', ev, 'mm')
    print('----------')

    return e1, e2, e3, ev

In [26]:
model = TemporalModel(poses_valid_2d[0].shape[-2], poses_valid_2d[0].shape[-1], dataset.skeleton().num_joints(),
                            filter_widths=filter_widths, causal=args.causal, dropout=args.dropout, channels=args.channels,
                            dense=args.dense)
checkpoint_tmp = torch.load("/home/gsc/pose_estimation_baseline/2d_to_3d_lifting_baseline/checkpoint243/epoch_80.pth", map_location=lambda storage, loc: storage)
model.load_state_dict(checkpoint_tmp['model_pos'])

print(model)
model.to('cuda')
torchsummary.summary(model,input_size = (243,17,2))


TemporalModel(
  (drop): Dropout(p=0.25, inplace=False)
  (relu): ReLU(inplace=True)
  (expand_bn): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (shrink): Conv1d(1024, 51, kernel_size=(1,), stride=(1,))
  (expand_conv): Conv1d(34, 1024, kernel_size=(3,), stride=(1,), bias=False)
  (layers_conv): ModuleList(
    (0): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(3,), bias=False)
    (1): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
    (2): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(9,), bias=False)
    (3): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
    (4): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(27,), bias=False)
    (5): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
    (6): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(81,), bias=False)
    (7): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
  )
  (layers_bn): Mod

In [27]:
import torch.onnx


In [33]:
model.cuda()
model.eval()
dummy_input = torch.randn(1,243,17,2,requires_grad=True).cuda()
dummy_input.is_cuda
model(dummy_input)
torch.onnx.export(model,
                 dummy_input,
                 "videopose243.onnx",
                 verbose=False,
                 input_names=['input'],
                 output_names=['output'],
                 export_params=True,
                 )

  assert x.shape[-2] == self.num_joints_in
  assert x.shape[-1] == self.in_features


In [None]:
# torch.save({
#                 'epoch': epoch,
#                 'lr': lr,
#                 'random_state': train_generator.random_state(),
#                 'optimizer': optimizer.state_dict(),
#                 'model_pos': model_pos_train.state_dict(),
#                 'model_traj': model_traj_train.state_dict() if semi_supervised else None,
#                 'random_state_semi': semi_generator.random_state() if semi_supervised else None,
#             }, chk_path)