In [1]:
# setting device on GPU if available, else CPU
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')


Using device: cuda

NVIDIA GeForce RTX 2080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os
import random
from typing import Callable, Dict, List, Optional, Tuple

import numpy as np
import torch
from torch.utils import data
from tqdm import tqdm



In [4]:
from configs.config import get_cfg_defaults
from core.datasets.dataset_loading_utils import load_dataset
from core.datasets.vq_dataset import DATALoader
from utils.vis_utils import plot_3d_global
from core.models.conformer_vqvae import ConformerVQMotionModel, Encoder
from torch.utils import data
from core.datasets.vq_dataset import DATALoader, MotionCollator
from einops import pack, rearrange, reduce, repeat, unpack

def pack_one(t, pattern):
    return pack([t], pattern)


In [5]:
import utils.vis_utils.plot_3d_global as plot_3d
from utils.motion_processing.hml_process import recover_from_ric


## Dataset creation

In [6]:
from utils.vis_utils.render_final import Renderer
renderer = Renderer(device)

In [7]:
path = "/srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/checkpoints/conformer_768_1024_hmlvec/conformer_768_1024_hmlvec.yaml"
cfg = get_cfg_defaults()
print("loading config from:", path)
cfg.merge_from_file(path)
cfg.freeze()

ckpt = torch.load("/srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/checkpoints/conformer_768_1024_hmlvec/vqvae_motion.pt" , map_location="cpu")
print(ckpt["steps"])

from core.models.conformer_vqvae import ConformerVQMotionModel, Encoder
convvq = ConformerVQMotionModel(cfg.vqvae).to(device).eval()
convvq.load("/srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/checkpoints/conformer_768_1024_hmlvec/vqvae_motion.pt")


loading config from: /srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/checkpoints/conformer_768_1024_hmlvec/conformer_768_1024_hmlvec.yaml
tensor([200000.])
Sync is turned on False


In [12]:
# cd ../motion_vqvae/

In [13]:
# from configs.config import get_cfg_defaults

# path = "/srv/hays-lab/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/conv_vq/convq_512_512/convq_512_512.yaml"
# cfg = get_cfg_defaults()
# print("loading config from:", path)
# cfg.merge_from_file(path)
# cfg.freeze()

# ckpt = torch.load("/srv/hays-lab/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/conv_vq/convq_512_512/vqvae_motion.pt" , map_location="cpu")
# print(ckpt["steps"])

# from motion_vqvae.core.models.conv_vqvae import ConvVQMotionModel
# convvq = ConvVQMotionModel(cfg.vqvae).to(device).eval()

# convvq.load_state_dict(ckpt["model"])

In [32]:
from glob import glob
alll = glob("/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/AIST/new_joint_vecs/*")

In [43]:
with open("/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotionSMPL/AIST_SMPL/all.txt" , "w") as f:
    for line in alll:
        f.write(f'{line.split("/")[-1].split(".")[0]}\n')

In [8]:
from core.datasets.vq_dataset import VQMotionDataset


In [55]:
train_ds = VQMotionDataset("cm" , "/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion" , window_size = -1, split = "all")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 496.87it/s]

Total number of motions 72





In [56]:
train_dl = DATALoader(
            train_ds,
            batch_size=1,
            shuffle=True,
            collate_fn=None,
        )

In [17]:
# for batch in train_dl:
#     break

In [12]:
dest = "/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotionIndices/HumanML3D/joint_indices"
os.makedirs(dest, exist_ok=True)

In [45]:
for i, batch in enumerate(tqdm(train_dl)):
    # if i < 12300:
    #     continue
    
    gt_motion = batch["motion"].to(device)
    if gt_motion.shape[1] > 100:
        ind = []
        for m in range(0, gt_motion.shape[1], 100):
            indics = convvq.encode(gt_motion[:, m:m+100])
            ind.append(indics[0])
        indices = torch.cat(ind)[None]
    else:
        indices = convvq.encode(gt_motion)
    np.save(os.path.join(dest , batch["names"][0]+".npy") , indices.detach().cpu().numpy())
    del indices
    del gt_motion
    torch.cuda.empty_cache()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2726/2726 [02:14<00:00, 20.30it/s]


In [None]:
"/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotionSMPLIndices/AIST/joint_indices/M_gJS_sBM_cAll_d03_mJS3_ch02"

In [None]:
og = f"/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/HumanML3D/new_joint_vecs/{batch['names'][0]}.npy"

In [57]:
for batch in train_dl:
    break
gt_motion = batch["motion"][:,:1000]
batch["motion"].shape

torch.Size([1, 3205, 263])

In [58]:
if gt_motion.shape[1] > 200:
    ind = []
    for m in range(0, gt_motion.shape[1], 80):
        indics = convvq.encode(gt_motion[:, m:m+80].to(device))
        ind.append(indics[0])
    indices = torch.cat(ind)[None]

In [59]:
indices.shape

torch.Size([1, 250])

In [60]:
quantized, decoded_motion_features = convvq.decode(indices.long())
decoded_motion_features.shape

torch.Size([1, 1000, 263])

In [61]:
gt_motion1 = (
    train_ds.inv_transform(gt_motion.cpu())
    .squeeze()
    .float()
)
pred_motion = (
    train_ds.inv_transform(decoded_motion_features.cpu())
    .squeeze()
    .float()
)

save_file = "/srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/renders"
gt_motion_xyz = recover_from_ric(gt_motion1, 22)
pred_motion_xyz = recover_from_ric(pred_motion, 22)

gt_pose_vis = plot_3d.draw_to_batch(
    gt_motion_xyz.numpy().squeeze()[None],
    None,
    [os.path.join(save_file, "t" + "_gt.gif")],
)
pred_pose_vis = plot_3d.draw_to_batch(
    pred_motion_xyz.numpy().squeeze()[None],
    None,
    [os.path.join(save_file, "t" + "_pred.gif")],
)

In [14]:
from core.models.evaluator_wrapper import EvaluatorModelWrapper
from utils.word_vectorizer import WordVectorizer
from core.datasets import dataset_TM_eval


In [15]:
w_vectorizer = WordVectorizer(
   "/srv/hays-lab/scratch/sanisetty3/music_motion/T2M-GPT/glove", "our_vab"
)
eval_wrapper = EvaluatorModelWrapper(cfg.eval_model)
tm_eval = dataset_TM_eval.DATALoader(
    32,
    w_vectorizer,
    unit_length=4,
)

Loading Evaluation Model Wrapper (Epoch 28) Completed!!


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4384/4384 [00:04<00:00, 1066.12it/s]

4248 4248
Pointer Pointing at 0





In [16]:
from utils.eval_trans import calculate_R_precision, calculate_multimodality, calculate_diversity, calculate_frechet_distance, calculate_activation_statistics

In [115]:
renderer.render(
    motion_vec=ogg[:1000,:135],
    outdir="./renders/",
    step=0,
    name=f"00_000007_og",
)

In [3]:
import numpy as np

In [13]:
hml = "/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/HumanML3D/Mean.npy"
hml2 = "/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/AIST/Mean.npy"
hml3 = "/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/Choreomaster/Mean.npy"

In [14]:
np.save("/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/Mean.npy" , np.mean([np.load(hml) + np.load(hml2) + np.load(hml3)] , 0))

In [138]:
from glob import glob

In [139]:
pths = sorted(glob("/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/HumanML3D/new_joint_vecs/*"))

In [145]:
pths[25000]

'/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/HumanML3D/new_joint_vecs/M008676.npy'

In [144]:
int(pths[25000].split("/")[-1].split(".")[0][-6:])

8676

In [156]:
add = []
for p in tqdm(pths):
    nm = int(p.split("/")[-1].split(".")[0][-6:])
    if nm > 14616:
        add.append(p.split("/")[-1].split(".")[0])
        

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 32648/32648 [00:00<00:00, 964613.99it/s]


In [158]:
with open(r'/srv/hays-lab/scratch/sanisetty3/music_motion/HumanMotion/HumanML3D/train.txt', 'a') as fp:
    for item in add:
        fp.write("%s\n" % item)
    print('Done')

Done


In [157]:
len(add)

3418

In [153]:
add = []
for p in tqdm(pths):
    n = p.split("/")[-1].split(".")[0]
    add.append(n)

100%|██████████████████████████████████████████████████████████████████████████████████████████████| 32648/32648 [00:00<00:00, 1130149.03it/s]


Done


# BERT

In [22]:
from core.datasets.dataset_loading_utils import load_dataset_bert
from core.datasets.motion_bert_dataset import BERTMotionDataset, DATALoader, BERTMotionDatasetSimplified
from core.models.BERT import BERT, BERTParams
from core.optimizer import get_optimizer
from torch.utils.data import DataLoader


In [23]:
path = "/srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/checkpoints/bert_12_768/bert_12_768.yaml"
cfg = get_cfg_defaults()
print("loading config from:", path)
cfg.merge_from_file(path)
cfg.freeze()

loading config from: /srv/hays-lab/scratch/sanisetty3/music_motion/TGM3D/checkpoints/bert_12_768/bert_12_768.yaml


In [24]:
train_ds, sampler_train, weights_train = load_dataset_bert(
                dataset_names=["t2m"],
                args=cfg,
                split="test",
                weight_scale=[1],
            )

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4384/4384 [00:02<00:00, 1603.35it/s]

Total number of motions 3516





In [25]:
dl = DATALoader(
            train_ds,
            batch_size=2,
            shuffle=False,
        )

In [26]:
params = BERTParams()
bert = BERT(params).cuda()

In [27]:
for data in dl:
    break

In [28]:
data.keys()

dict_keys(['bert_input', 'bert_label'])

In [14]:
data["bert_label"][0 , :27]

tensor([1024,   -1,  611,  242,   -1,   -1,  193,   -1,   -1,  736,  839,   -1,
          -1,   -1,  922,   -1,   -1,   -1,  845,   -1,   -1,   -1,  552,  358,
          -1,   -1, 1024])

In [15]:
data["motion_lengths"]

tensor([[25, 25],
        [17, 45]])

In [29]:
nsp_loss_fnc = torch.nn.NLLLoss(ignore_index=0)
mlm_loss_fnc = torch.nn.CrossEntropyLoss(ignore_index=-1)

In [31]:
mask_lm_output = bert.forward(data["bert_input"].cuda())

In [41]:
data["bert_label"][0]

tensor([1024,   -1,   -1,   -1,  370,   -1,   -1,   -1,  888,   -1,   -1,  699,
          -1,   -1,   -1,   -1,   -1,  699,  845,   -1,   -1,   -1,   -1,   -1,
         148,   -1,   -1,   -1,   -1,  197,   -1,   -1,   -1,   -1,   -1,   -1,
          -1,  699,  447,   -1,  177,  591,   -1,   -1,   -1,  999,   -1,  142,
          -1,  627,   -1, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
        1024, 1024, 1024, 1024, 1024, 10

In [55]:
mask_lm_output.transpose(1, 2).reshape(-1,1027).shape

torch.Size([1024, 1027])

In [41]:
next_sent_output.shape

torch.Size([2, 2])

In [43]:
data["is_next"]

tensor([[1],
        [1]])

In [33]:
# next_loss = nsp_loss_fnc(next_sent_output, data["is_next"].cuda().reshape(-1))

# 2-2. NLLLoss of predicting masked token word
mask_loss = mlm_loss_fnc(
    mask_lm_output.transpose(1, 2), data["bert_label"].cuda()
)

In [39]:
mask_loss

tensor(7.0331, device='cuda:0', grad_fn=<NllLoss2DBackward0>)

In [58]:
correct = (
                    next_sent_output.argmax(dim=-1).eq(data["is_next"].cuda()).sum().item()
                )

In [64]:
next_sent_output.argmax(dim=-1)

tensor([0, 0], device='cuda:0')

In [65]:
data["is_next"].shape

torch.Size([2, 1])

In [62]:
next_sent_output.shape

torch.Size([2, 2])

In [42]:
def prob_mask_like(t, prob):
    return torch.zeros_like(t).float().uniform_(0, 1) < prob


def mask_with_tokens(t, token_ids):
    init_no_mask = torch.full_like(t, False, dtype=torch.bool)
    mask = reduce(lambda acc, el: acc | (t == el), token_ids, init_no_mask)
    return mask


def get_mask_subset_with_prob(mask, prob):
    batch, seq_len, device = *mask.shape, mask.device
    max_masked = math.ceil(prob * seq_len)

    num_tokens = mask.sum(dim=-1, keepdim=True)
    mask_excess = mask.cumsum(dim=-1) > (num_tokens * prob).ceil()
    mask_excess = mask_excess[:, :max_masked]

    rand = torch.rand((batch, seq_len), device=device).masked_fill(~mask, -1e9)
    _, sampled_indices = rand.topk(max_masked, dim=-1)
    sampled_indices = (sampled_indices + 1).masked_fill_(mask_excess, 0)

    new_mask = torch.zeros((batch, seq_len + 1), device=device)
    new_mask.scatter_(-1, sampled_indices, 1)
    return new_mask[:, 1:].bool()

In [44]:
mask_ignore_token_ids = [1025,1026, 1027]

In [None]:
seq = 

In [None]:
no_mask = mask_with_tokens(seq, self.mask_ignore_token_ids)
mask = get_mask_subset_with_prob(~no_mask, 0.3)