In [1]:
# Change this line if you're using Colab to something like '/content/drive/MyDrive/TeamX/'
# where TeamX is just the clone of repository on your Google Drive
# and you have mounted the drive at /content/drive  
# See the Tutorial Slides for more detail.

# Works on your local machine but not on Colab!
PROJECT_ROOT = '../..' 

# Fix this path and use this one on Colab
# PROJECT_ROOT = '/content/drive/MyDrive/TeamX'

import os
import sys
import torch
import torch.nn as nn

from os.path import join as ospj
sys.path.append(ospj(PROJECT_ROOT, 'src'))
%load_ext autoreload
%autoreload 2

In [2]:
!wget "https://download.pytorch.org/models/vit_b_16-c867db91.pth"

# Change the relative paths below to absolute if running on Colab
!mkdir -p ../../saved/models/vit
!mv vit_b_16-c867db91.pth ../../saved/models/vit/vit.pth

'wget' is not recognized as an internal or external command,
operable program or batch file.
The syntax of the command is incorrect.
'mv' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
""" Setup input pipeline
"""
from utils.config_parser import ConfigParser
import data.threeDPW as module_data 

%aimport -ConfigParser # Due to an issue of pickle and auto_reload

config = ConfigParser.wo_args(config=ospj(PROJECT_ROOT,'cfgs/project/hppw-config.json'))

datamodule = config.init_obj('train_loader', module_data)
train_loader = datamodule.get_loader()

datamodule = config.init_obj('validation_loader', module_data)
val_loader = datamodule.get_loader()

datamodule = config.init_obj('test_loader', module_data)
test_loader = datamodule.get_loader()


In [3]:
""" Visualize any loader
"""
from utils.viz import visualize_tfrecord_dataloader

# Press q multiple times to exit. Maybe a better way possible.
# Reduce batch size in the cfgs/project/*.json for the respective loader. 
visualize_tfrecord_dataloader(val_loader)



In [3]:
from models import *
from models.vit.model import VisionTransformer
print(os.getcwd())

weights = torch.load(ospj(PROJECT_ROOT,'saved/models/vit.pth'))

model = VisionTransformer(
            image_size=224, # Input image size (width and height)
            patch_size=16,  # Image broken into (16 x 16) non-overlaping batches
            num_layers=4,  # Number of blocks in the Encoder
            num_heads=12,   # Number of heads in each Multi-"head" attention
            hidden_dim=768, # Token size (length of a single token)
            mlp_dim=3072,   # Hidden layer size of each MLP layer,
            total_layers=12,
            global_pool="avg"
        )

model.load_state_dict(weights, strict=True)

d:\Saarland\Research\human-pose-prediction-in-the-wild\src\notebooks


<All keys matched successfully>

In [5]:
""" Initialize the trainer and model
"""
import matplotlib.pyplot as plt

from trainers.hppw_trainer import HPPWTrainer
from utils.config_parser import ConfigParser
import data.threeDPW as module_data 

%aimport -ConfigParser # Due to an issue of pickle and auto_reload

config = ConfigParser.wo_args(config=ospj(PROJECT_ROOT,'cfgs/project/asim-config.json'))
trainer = HPPWTrainer(config=config, train_loader=train_loader, eval_loader=val_loader)
# stats = trainer.train()
# plt.plot(stats['loss']['train'], label='train')
# plt.plot(stats['loss']['val'], label='val')
# plt.title('Classification loss history')
# plt.xlabel('Epoch')
# plt.ylabel('Classification loss')
# plt.legend()
# plt.show()



In [44]:
from prettytable import PrettyTable
def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [14]:
from torchsummary import summary
for i, (history, future) in enumerate(test_loader):
    img_seq = history[0]
    history_pose_seq = history[1]
    history_root_seq = history[2]
    history_mask = history[3]
    output = trainer.model(img_seq, history_pose_seq, history_root_seq, history_mask)
    print(output.shape)
    break

RuntimeError: Given normalized_shape=[256], expected input with shape [*, 256], but got input of size[16, 197, 768]

In [None]:
import torchinfo
torchinfo.summary(trainer.model)

Layer (type:depth-idx)                                                      Param #
HumanPosePredictorModel                                                     --
├─PoseEncoder: 1-1                                                          --
│    └─Sequential: 2-1                                                      --
│    │    └─PoseEncoderBlock: 3-1                                           527,104
│    │    └─PoseEncoderBlock: 3-2                                           527,104
├─VisionTransformer: 1-2                                                    768
│    └─Conv2d: 2-2                                                          590,592
│    └─Encoder: 2-3                                                         151,296
│    │    └─MultiInputSequential: 3-3                                       28,351,488
│    │    └─LayerNorm: 3-4                                                  1,536
│    └─Sequential: 2-4                                                      --
│    │    └─Lin

In [None]:
""" Perform test
"""

checkpoint_dir = '0604_104809' 
path = ospj(PROJECT_ROOT, f'saved/models/hppw/{checkpoint_dir}/best_val_model.pth')

trainer.load_model(path=path)

result = trainer.evaluate(loader=test_loader)

print(result)


In [31]:
from models.temporal.encoder import TemporalEncoder, LocalTemporalEncoderBlock

model = TemporalEncoder(
    num_layers=3,
    num_heads=8,
    hidden_dim=256,
    mlp_dim=512
)

In [34]:
local_feat = torch.randn(size=(32, 15, 197, 256))
global_feat = torch.randn(size=(32, 15, 256))

local_out, global_out = model(local_feat, global_feat)

In [35]:
local_out

tensor([[[ 0.7680,  0.1498,  2.5715,  ..., -1.7233, -7.7865,  8.6335],
         [ 0.7680,  0.1498,  2.5715,  ..., -1.7233, -7.7865,  8.6336],
         [ 0.7680,  0.1498,  2.5715,  ..., -1.7233, -7.7865,  8.6336],
         ...,
         [ 0.7680,  0.1498,  2.5715,  ..., -1.7233, -7.7865,  8.6335],
         [ 0.7680,  0.1498,  2.5715,  ..., -1.7233, -7.7865,  8.6335],
         [ 0.7680,  0.1498,  2.5715,  ..., -1.7233, -7.7865,  8.6336]],

        [[-1.2756, -7.0257, -1.1117,  ...,  4.7989, -0.6455,  3.4401],
         [-1.2756, -7.0257, -1.1117,  ...,  4.7989, -0.6455,  3.4401],
         [-1.2756, -7.0257, -1.1117,  ...,  4.7989, -0.6455,  3.4401],
         ...,
         [-1.2756, -7.0257, -1.1117,  ...,  4.7989, -0.6455,  3.4401],
         [-1.2756, -7.0257, -1.1117,  ...,  4.7989, -0.6455,  3.4401],
         [-1.2756, -7.0257, -1.1117,  ...,  4.7989, -0.6455,  3.4401]],

        [[-1.1115, -3.7773, -9.5381,  ..., -0.7923,  2.3916, -3.7397],
         [-1.1115, -3.7773, -9.5381,  ..., -0

In [33]:
from torchsummary import summary

summary(model, input_data=[local_feat, global_feat])

Layer (type:depth-idx)                             Output Shape              Param #
├─MultiInputSequential: 1-1                        [-1, 197, 256]            --
|    └─TemporalEncoderBlock: 2-1                   [-1, 14, 197, 256]        --
|    |    └─LocalTemporalEncoderBlock: 3-1         [-1, 14, 197, 256]        528,128
|    |    └─GlobalTemporalEncoderBlock: 3-2        [-1, 15, 256]             527,104
|    └─TemporalEncoderBlock: 2-2                   [-1, 13, 197, 256]        --
|    |    └─LocalTemporalEncoderBlock: 3-3         [-1, 13, 197, 256]        528,128
|    |    └─GlobalTemporalEncoderBlock: 3-4        [-1, 15, 256]             527,104
|    └─TemporalEncoderBlock: 2-3                   [-1, 197, 256]            --
|    |    └─LocalTemporalEncoderBlock: 3-5         [-1, 197, 256]            528,128
|    |    └─GlobalTemporalEncoderBlock: 3-6        [-1, 15, 256]             527,104
Total params: 3,165,696
Trainable params: 3,165,696
Non-trainable params: 0
Total mul

Layer (type:depth-idx)                             Output Shape              Param #
├─MultiInputSequential: 1-1                        [-1, 197, 256]            --
|    └─TemporalEncoderBlock: 2-1                   [-1, 14, 197, 256]        --
|    |    └─LocalTemporalEncoderBlock: 3-1         [-1, 14, 197, 256]        528,128
|    |    └─GlobalTemporalEncoderBlock: 3-2        [-1, 15, 256]             527,104
|    └─TemporalEncoderBlock: 2-2                   [-1, 13, 197, 256]        --
|    |    └─LocalTemporalEncoderBlock: 3-3         [-1, 13, 197, 256]        528,128
|    |    └─GlobalTemporalEncoderBlock: 3-4        [-1, 15, 256]             527,104
|    └─TemporalEncoderBlock: 2-3                   [-1, 197, 256]            --
|    |    └─LocalTemporalEncoderBlock: 3-5         [-1, 197, 256]            528,128
|    |    └─GlobalTemporalEncoderBlock: 3-6        [-1, 15, 256]             527,104
Total params: 3,165,696
Trainable params: 3,165,696
Non-trainable params: 0
Total mul

In [12]:

""" Initialize the trainer and model
"""
import matplotlib.pyplot as plt

from trainers.hppw_trainer import HPPWTrainer

trainer = HPPWTrainer(config=config, train_loader=train_loader, eval_loader=val_loader)
# stats = trainer.train()

plt.plot(stats['loss']['train'], label='train')
plt.plot(stats['loss']['val'], label='val')
plt.title('Classification loss history')
plt.xlabel('Epoch')
plt.ylabel('Classification loss')
plt.legend()
plt.show()




KeyError: 'wandb'