In [1]:
import os
os.chdir('..')
import sys
sys.path.append("/workspace/src")

In [2]:
from databases.datasets import (
    Mpi3dTestDataset,
    Mpi3dTrainDataset,
    PersonStackedMucoTempDataset,
    ConcatPoseDataset,
)
from model.videopose import TemporalModel, TemporalModelOptimized1f
from training.preprocess import *
from training.loaders import ChunkedGenerator

In [3]:
_config = {
        "num_epochs": 80,
        "preprocess_2d": "DepthposeNormalize2D",
        "preprocess_3d": "SplitToRelativeAbsAndMeanNormalize3D",
        # training
        "optimiser": "adam",
        "adam_amsgrad": True,
        "learning_rate": 1e-3,
        "sgd_momentum": 0,
        "batch_size": 1024,
        "train_time_flip": True,
        "test_time_flip": True,
        "lr_scheduler": {"type": "multiplicative", "multiplier": 0.95, "step_size": 1,},
        # dataset
        "train_data": "mpii_train",
        "pose2d_type": "hrnet",
        "pose3d_scaling": "normal",
        "megadepth_type": "megadepth_at_hrnet",
        "cap_25fps": True,
        "stride": 2,
        "simple_aug": True,  # augments data by duplicating each frame
        "model": {
            "loss": "l1",
            "channels": 1024,
            "dropout": 0.25,
            "filter_widths": [3, 3, 3, 3],
            "layernorm": False,
        },
    }

In [4]:
train_data = Mpi3dTrainDataset(
    _config["pose2d_type"],
    _config["pose3d_scaling"],
    _config["cap_25fps"],
    _config["stride"],
)
test_data = Mpi3dTestDataset(
    _config["pose2d_type"], _config["pose3d_scaling"], eval_frames_only=True
)

In [5]:
train_data.poses2d[9, :, 2]

array([0.9142475 , 0.8918443 , 0.9529049 , 0.55906343, 0.94791263,
       0.7756213 , 0.8251141 , 0.4155107 , 0.67124367, 0.25609845,
       0.8118482 , 0.63151073, 0.7830368 , 0.7801824 , 0.81855637,
       0.6617112 , 0.8385383 , 0.6991591 , 0.79960364], dtype=float32)

In [6]:
train_data.prepare_sample(9)['pose2d'].shape

(19, 3)

In [7]:
train_data.transform = None
transforms_train = [
    decode_trfrm(_config["preprocess_2d"], globals())(train_data, cache=False),
    decode_trfrm(_config["preprocess_3d"], globals())(train_data, cache=False),
]

normalizer2d = transforms_train[0].normalizer
normalizer3d = transforms_train[1].normalizer

transforms_test = [
    decode_trfrm(_config["preprocess_2d"], globals())(test_data, normalizer2d),
    decode_trfrm(_config["preprocess_3d"], globals())(test_data, normalizer3d),
]

transforms_train.append(RemoveIndex())
transforms_test.append(RemoveIndex())

train_data.transform = SaveableCompose(transforms_train)
test_data.transform = SaveableCompose(transforms_test)

In [8]:
model = TemporalModelOptimized1f(
    train_data[[0]]["pose2d"].shape[-1],
    MuPoTSJoints.NUM_JOINTS,
    _config["model"]["filter_widths"],
    dropout=_config["model"]["dropout"],
    channels=_config["model"]["channels"],
    layernorm=_config["model"]["layernorm"],
)
test_model = TemporalModel(
    train_data[[0]]["pose2d"].shape[-1],
    MuPoTSJoints.NUM_JOINTS,
    _config["model"]["filter_widths"],
    dropout=_config["model"]["dropout"],
    channels=_config["model"]["channels"],
    layernorm=_config["model"]["layernorm"],
)
model.cuda()
test_model.cuda()

TemporalModel(
  (drop): Dropout(p=0.25, inplace=False)
  (relu): ReLU(inplace=True)
  (shrink): Conv1d(1024, 51, kernel_size=(1,), stride=(1,))
  (expand_conv): Conv1d(42, 1024, kernel_size=(3,), stride=(1,), bias=False)
  (expand_bn): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layers_conv): ModuleList(
    (0): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(3,), bias=False)
    (1): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
    (2): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(9,), bias=False)
    (3): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
    (4): Conv1d(1024, 1024, kernel_size=(3,), stride=(1,), dilation=(27,), bias=False)
    (5): Conv1d(1024, 1024, kernel_size=(1,), stride=(1,), bias=False)
  )
  (layers_bn): ModuleList(
    (0): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine

In [9]:
pad = (model.receptive_field() - 1) // 2
train_loader = ChunkedGenerator(
    train_data, _config["batch_size"], pad, _config["train_time_flip"], shuffle=True
)

In [10]:
data = next(iter(train_loader))

In [11]:
data['valid_pose'].numpy().mean()

0.9423828125

In [12]:
{k: v.shape for k, v in data.items()}

{'temporal_pose2d': torch.Size([1024, 81, 42]),
 'pose3d': torch.Size([1024, 1, 51]),
 'valid_pose': torch.Size([1024])}

In [13]:
pred_3d = model(data["temporal_pose2d"].to("cuda"))

In [14]:
((data['temporal_pose2d'][0, 0, 2::3] * normalizer2d.std[2::3]) + normalizer2d.mean[2::3])

tensor([0.8227, 0.9570, 0.9473, 0.7910, 0.9579, 0.9527, 0.9626, 0.9731, 0.9577,
        0.9540, 0.9647, 0.9597, 0.9681, 0.7999])

In [35]:
data['temporal_pose2d'][0, 0, 2::3].shape

torch.Size([14])

In [34]:
train_data.poses2d[0,:,2].shape

(19,)

In [54]:
is_probs = [np.all(((data['temporal_pose2d'][:, :, 2::3] >= 0) & (data['temporal_pose2d'][:, :, 2::3] <= 1)).numpy()) for data in train_loader]

In [56]:
for data in train_loader:
    np.all(((data['temporal_pose2d'][:, :, 2::3] >= 0) & (data['temporal_pose2d'][:, :, 2::3] <= 1)).numpy())
    break

In [59]:

np.all(((data['temporal_pose2d'][:, :, 2::3] <= 1)).numpy())

False

In [69]:
arr = data['temporal_pose2d'][:, :, 2::3]
arr[arr > 1]

tensor([1.0553, 1.0014, 1.0082, 1.0124, 1.0116, 1.0231, 1.0001, 1.0168, 1.0074,
        1.0004, 1.0051, 1.0141, 1.0183, 1.0208, 1.0180, 1.0122, 1.0060, 1.0039,
        1.0130, 1.0086, 1.0208, 1.0096, 1.0082, 1.0054, 1.0246, 1.0054, 1.0139,
        1.0007, 1.0305, 1.0597, 1.0226, 1.0431, 1.0040, 1.0130, 1.0012, 1.0005,
        1.0110, 1.0138, 1.0211, 1.0126, 1.0113, 1.0249, 1.0023, 1.0121, 1.0007,
        1.0217, 1.0284, 1.0114, 1.0209, 1.0377, 1.0416, 1.0058, 1.0078, 1.0236,
        1.0193, 1.0000, 1.0100, 1.0079, 1.0189, 1.0044, 1.0187, 1.0090, 1.0191,
        1.0426, 1.0362, 1.0352, 1.0169, 1.0000, 1.0017, 1.0075, 1.0048, 1.0018,
        1.0018, 1.0039, 1.0021, 1.0256, 1.0029, 1.0145, 1.0087, 1.0054, 1.0250,
        1.0294, 1.0149, 1.0041, 1.0058, 1.0014, 1.0204, 1.0010, 1.0129, 1.0235,
        1.0292, 1.0129, 1.0063, 1.0028, 1.0007, 1.0083, 1.0021, 1.0018, 1.0113,
        1.0019, 1.0318, 1.0023, 1.0121, 1.0054, 1.0250, 1.0294, 1.0029, 1.0036,
        1.0112, 1.0195, 1.0045, 1.0064, 

In [16]:
data['temporal_pose2d'].shape

torch.Size([1024, 81, 42])