In [1]:
# setting device on GPU if available, else CPU
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')


Using device: cuda

GeForce RTX 2080 Ti
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import wandb

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import DataLoader

from torch.utils import data


import copy
import os
import random
import cv2
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import functools
from tqdm import tqdm
from datetime import datetime
import numpy as np
from core.datasets.vqa_motion_dataset import VQMotionDataset,DATALoader,VQVarLenMotionDataset,MotionCollator,VQFullMotionDataset
from einops import rearrange, reduce, pack, unpack
import librosa

In [4]:
from utils.motion_process import recover_from_ric
import visualize.plot_3d_global as plot_3d
from glob import glob
def to_xyz(motion, mean ,std , j = 22):
    motion_xyz = recover_from_ric(motion.cpu().float()*std+mean, j)
    motion_xyz = motion_xyz.reshape(motion.shape[0],-1, j, 3)
    return motion_xyz

            
def sample_render(motion_xyz , name , save_path):
    print(f"render start")
    
    gt_pose_vis = plot_3d.draw_to_batch(motion_xyz.numpy(),None, [os.path.join(save_path,name + ".gif")])



In [5]:
from configs.config import cfg, get_cfg_defaults
from core.models.vqvae import VQMotionModel
from core.models.motion_regressor import MotionRegressorModel


cfg_vq = get_cfg_defaults()
cfg_vq.merge_from_file("/srv/scratch/sanisetty3/music_motion/motion_vqvae/configs/var_len_768_768_aist_vq.yaml")




In [170]:
vqvae_model = VQMotionModel(cfg_vq.vqvae).eval()
pkg = torch.load(f"/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768_mix/vqvae_motion_best_fid.pt", map_location = 'cpu')
print(pkg["steps"])
vqvae_model.load_state_dict(pkg["model"])
vqvae_model =vqvae_model.cuda()


tensor([295000.])


In [8]:
collate_fn = MotionCollator()


In [9]:
train_ds = VQVarLenMotionDataset("t2m", split = "render" , max_length_seconds = 10, data_root = "/srv/scratch/sanisetty3/music_motion/HumanML3D/HumanML3D")
train_loader = DATALoader(train_ds,1,collate_fn=collate_fn)

changing range to: 60 - 60


100%|██████████| 10/10 [00:00<00:00, 37.82it/s]

Total number of motions 10





In [10]:
aist_ds = VQVarLenMotionDataset("aist", split = "render" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , num_stages = 6 ,min_length_seconds=20, max_length_seconds=30)
aist_loader = DATALoader(aist_ds,1,collate_fn=collate_fn)

changing range to: 400 - 400


100%|██████████| 8/8 [00:00<00:00, 39.72it/s]

Total number of motions 8





In [11]:
aist_loader.dataset.set_stage(5)

changing range to: 400 - 600


In [12]:
for aist_batch in aist_loader:
    break
aist_batch["motion"].shape

torch.Size([1, 159, 263])

## Trans model

In [168]:
from configs.config import cfg, get_cfg_defaults
from core.models.motion_regressor import MotionRegressorModel


cfg_trans = get_cfg_defaults()
cfg_trans.merge_from_file("/srv/scratch/sanisetty3/music_motion/motion_vqvae/configs/var_len_768_768_aist.yaml")



In [169]:
trans_model = MotionRegressorModel(args = cfg_trans.motion_trans,pad_value=1025 ).eval()
pkg_trans = torch.load(f"/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/generator/var_len/trans_768_768_albi_aist/trans_motion_best_fid.pt", map_location = 'cpu')
print(pkg_trans["steps"])
trans_model.load_state_dict(pkg_trans["model"])
trans_model =trans_model.cuda()


tensor([195000.])


## Encode Decode

In [52]:
ind = vqvae_model.encode(aist_batch["motion"].cuda())
print(ind.shape)
quant , out_motion = vqvae_model.decode(ind)

torch.Size([1, 191])


In [39]:
out = torch.empty(aist_batch["motion"].shape)

In [27]:
ind = vqvae_model.encode(aist_batch["motion"][:,:400].cuda())
quant , out_motion = vqvae_model.decode(ind)

In [53]:
quant , out_motion = vqvae_model.decode(ind[:,400:].to(torch.long).cuda())

In [54]:
out[:,400:] = out_motion


In [53]:
sample_render(to_xyz(aist_batch["motion"][0:1].detach().cpu(),mean = aist_ds.mean , std = aist_ds.std), "rnd_og_motion" , "/srv/scratch/sanisetty3/music_motion/motion_vqvae/evals/decode_test")

render start


In [57]:
sample_render(to_xyz(out[0:1].detach().cpu(),mean = aist_ds.mean , std = aist_ds.std), "rnd_motion_ind_400" , "/srv/scratch/sanisetty3/music_motion/motion_vqvae/evals/decode_test")

render start


In [47]:
indices = torch.randint(0,1024,(1,60))
quant , out_motion = vqvae_model.decode(indices.cuda())

In [54]:
sample_render(to_xyz(out_motion[0:1].detach().cpu(),mean = aist_ds.mean , std = aist_ds.std), "rnd_motion" , "/srv/scratch/sanisetty3/music_motion/motion_vqvae/evals/decode_test")

render start


## Music Eval stuff

In [7]:
from utils.motion_process import recover_from_ric
from utils.aist_metrics import calculate_fid_scores
from utils.aist_metrics.calculate_fid_scores import calculate_avg_distance, extract_feature,calculate_frechet_feature_distance,calculate_frechet_distance
from utils.aist_metrics.features import kinetic,manual
from utils.aist_metrics.calculate_beat_scores import motion_peak_onehot,alignment_score

In [8]:
from core.datasets.vqa_motion_dataset import MotionCollatorConditional, TransMotionDatasetConditional,VQMotionDataset,DATALoader,VQVarLenMotionDataset,MotionCollator,VQFullMotionDataset


In [9]:
from utils.eval_music import evaluate_music_motion_vqvae, evaluate_music_motion_generative,evaluate_music_motion_trans

In [14]:
for aist_batch in tqdm(aist_loader):
    break

  0%|          | 0/40 [00:00<?, ?it/s]


### Const len trained transformer

In [15]:
from core.models.motion_regressor import MotionRegressorModel

trans_model = MotionRegressorModel(args = cfg_trans.motion_trans , ignore_index=1025 ,pad_value=1025 ).eval()
pkg_trans = torch.load(f"/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/const_len/trans_768_768_aist/vqvae_motion.pt", map_location = 'cpu')
print(pkg_trans["steps"])
trans_model.load_state_dict(pkg_trans["model"])
trans_model =trans_model.cuda()


tensor([85000.])


In [39]:
0.23/0.243 * 0.292

0.2763786008230453

## Evaluate Music Motion transformer

In [165]:
encodec = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/generator/var_len/trans_768_768_albi_aist/var_len_768_768_aist.yaml"
encodec_sine = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/generator/var_len/trans_768_768_sine_aist/var_len_768_768_sine_aist.yaml"
librosa = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/generator/var_len/trans_768_768_albi_aist_35/var_len_768_768_aist_35.yaml"
encodec_prob50 = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/generator/var_len/trans_768_768_albi_aist_mask_prob50/trans_768_768_albi_aist_mask_prob50.yaml"


In [166]:
from configs.config import cfg, get_cfg_defaults
from core.models.motion_regressor import MotionRegressorModel


trans_option = "encodec"

cfg_trans = get_cfg_defaults()
cfg_trans.merge_from_file(encodec)


trans_model = MotionRegressorModel(args = cfg_trans.motion_trans,pad_value=1025 ).eval()
pkg_trans = torch.load(f"{os.path.dirname(encodec)}/trans_motion_best_fid.pt", map_location = 'cpu')
print(pkg_trans["steps"])
trans_model.load_state_dict(pkg_trans["model"])
trans_model =trans_model.cuda()


tensor([195000.])


In [167]:
audio_encoding_dir = "/srv/scratch/sanisetty3/music_motion/AIST/music"
audio_features_dir = "/srv/scratch/sanisetty3/music_motion/AIST/audio_features/"
use35 = False
if trans_option == "librosa":
    audio_encoding_dir = audio_features_dir
    use35 = True

In [19]:
use35

False

## Evaluate Music Motion Generative

In [12]:
from utils.eval_music import evaluate_music_motion_vqvae, evaluate_music_motion_generative,evaluate_music_motion_generative2,evaluate_music_motion_trans

In [40]:
aist_ds = VQFullMotionDataset("aist", split = "test" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
aist_loader = DATALoader(aist_ds,1,collate_fn=None)

100%|██████████| 40/40 [00:00<00:00, 1700.32it/s]

Total number of motions 40





In [49]:
from utils.eval_music import evaluate_music_motion_generative
print("pretrained mix")
best_fid_k = []
best_fid_g = []
best_div_k = []
best_div_g = []
best_beat_align = []

for i in range(1):

    a,b,c,d,e = evaluate_music_motion_generative(aist_loader , vqvae_model= vqvae_model ,net = trans_model,use35=use35)
    best_fid_k.append(a)
    best_fid_g.append(b)
    best_div_k.append(c)
    best_div_g.append(d)
    best_beat_align.append(e)

    
print("best_fid_k" , np.mean(best_fid_k))
print("best_fid_g" , np.mean(best_fid_g))
print("best_div_k" , np.mean(best_div_k))
print("best_div_g" , np.mean(best_div_g))
print("best_beat_align" , np.mean(best_beat_align))



  2%|▏         | 7/400 [00:00<00:06, 64.18it/s]

pretrained mix


100%|██████████| 400/400 [00:06<00:00, 60.42it/s]
100%|██████████| 400/400 [00:06<00:00, 60.73it/s]
100%|██████████| 400/400 [00:06<00:00, 60.44it/s]
100%|██████████| 400/400 [00:06<00:00, 60.62it/s]
100%|██████████| 400/400 [00:06<00:00, 60.56it/s]
100%|██████████| 400/400 [00:06<00:00, 60.68it/s]
100%|██████████| 400/400 [00:06<00:00, 60.08it/s]
100%|██████████| 400/400 [00:06<00:00, 59.90it/s]
100%|██████████| 400/400 [00:06<00:00, 60.57it/s]
100%|██████████| 400/400 [00:06<00:00, 60.68it/s]
100%|██████████| 400/400 [00:06<00:00, 60.56it/s]
100%|██████████| 400/400 [00:06<00:00, 60.67it/s]
100%|██████████| 400/400 [00:06<00:00, 60.68it/s]
100%|██████████| 400/400 [00:06<00:00, 60.73it/s]
100%|██████████| 400/400 [00:06<00:00, 60.83it/s]
100%|██████████| 400/400 [00:06<00:00, 60.69it/s]
100%|██████████| 400/400 [00:06<00:00, 60.64it/s]
100%|██████████| 400/400 [00:06<00:00, 60.73it/s]
100%|██████████| 400/400 [00:06<00:00, 60.77it/s]
100%|██████████| 400/400 [00:06<00:00, 60.50it/s]


FID_k:  6.403103264620739 Diversity_k: 10.598105856088491
FID_g:  11.891712829203748 Diversity_g: 7.263086085441785

Beat score on real data: 0.244


Beat score on generated data: 0.212

\PFC score on real data: 2.113

\PFC score on generated data: 2.808

best_fid_k 6.403103264620739
best_fid_g 11.891712829203748
best_div_k 10.598105856088491
best_div_g 7.263086085441785
best_beat_align 0.24413006020223502





In [None]:
FID_k:  5.848359006888529 Diversity_k: 9.930449794347469
FID_g:  10.19790933574938 Diversity_g: 7.301737963236295

Beat score on real data: 0.244


Beat score on generated data: 0.233

\PFC score on real data: 2.113

\PFC score on generated data: 1.169

In [13]:
aist_ds_train = VQFullMotionDataset("aist", split = "train" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
aist_loader_train = DATALoader(aist_ds_train,1,collate_fn=None)

100%|██████████| 1910/1910 [00:01<00:00, 1568.45it/s]

Total number of motions 1910





In [20]:

print("pretrained mix")
best_fid_k = []
best_fid_g = []
best_div_k = []
best_div_g = []
best_beat_align = []
seq_len = 900
for i in range(1):

    a,b,c,d,e,f,g = evaluate_music_motion_generative2(aist_loader_train , vqvae_model= vqvae_model ,net = trans_model, use35 = use35,seq_len = seq_len)
    best_fid_k.append(a)
    best_fid_g.append(b)
    best_div_k.append(c)
    best_div_g.append(d)
    best_beat_align.append(e)

    
print("best_fid_k" , np.mean(best_fid_k))
print("best_fid_g" , np.mean(best_fid_g))
print("best_div_k" , np.mean(best_div_k))
print("best_div_g" , np.mean(best_div_g))
print("best_beat_align" , np.mean(best_beat_align))



  1%|          | 7/900 [00:00<00:14, 62.63it/s]

pretrained mix


100%|██████████| 900/900 [00:20<00:00, 42.96it/s]
100%|██████████| 900/900 [00:20<00:00, 43.03it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.70it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.74it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.69it/s]it]
100%|██████████| 900/900 [00:21<00:00, 42.81it/s]it]
100%|██████████| 900/900 [00:21<00:00, 42.37it/s]]  
100%|██████████| 900/900 [00:21<00:00, 42.73it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.74it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.77it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.46it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.69it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.79it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.13it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.60it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.56it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.75it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.85it/s]]
100%|██████████| 900/900 [00:21<00:00, 42.64it/s]t]
100%|██████████| 900/900 

FID_k:  6.394353653410235 Diversity_k: 10.558077923262992
FID_g:  8.632308692625102 Diversity_g: 7.294414699949869
\PFC score on real data: 1.677

\PFC score on generated data: 1.239


Beat score on real data: 0.170


Beat score on generated data: 0.164

best_fid_k 6.394353653410235
best_fid_g 8.632308692625102
best_div_k 10.558077923262992
best_div_g 7.294414699949869
best_beat_align 0.17011690074139812





## AICHOREO

In [89]:
from utils.motion_process import recover_from_ric
from utils.aist_metrics.calculate_fid_scores import calculate_avg_distance, extract_feature,calculate_frechet_feature_distance,calculate_frechet_distance
from utils.aist_metrics.features import kinetic,manual
from utils.aist_metrics.calculate_beat_scores import motion_peak_onehot,alignment_score


In [97]:


result_features = {"kinetic": [], "manual": []}
real_features = {"kinetic": [], "manual": []}

mean = aist_loader.dataset.mean
std = aist_loader.dataset.std

beat_scores_real = []
beat_scores_pred = []

real_pfc = []
pred_pfc = []

seq_len =800

audio_dir = audio_encoding_dir = "/srv/scratch/sanisetty3/music_motion/AIST/music"
audio_feature_dir = "/srv/scratch/sanisetty3/music_motion/AIST/audio_features/"


# smpl_motions_aist = glob("/srv/scratch/sanisetty3/clean/mint/evals/eval60/*.npy")
# for i in glob("/srv/scratch/sanisetty3/clean/mint/evals/eval60/hml/*.npy"):
#     smpl_motion = np.load(i)[120: , 6:]
# #     print(smpl_motion.shape)
#     seq_name = os.path.basename(i).split(".")[0]
#     smpl_motions_aist.append({"motion":smpl_motion , "name" : seq_name })



for i,path in enumerate(tqdm(smpl_motions_aist)):
    
    motion_name = os.path.basename(path)
    
    gt_motion = torch.Tensor(np.load(f"/srv/scratch/sanisetty3/music_motion/AIST/new_joint_vecs/{motion_name[:-9]}.npy")[None,...])
#     print(gt_motion.shape)
    out_motion = torch.Tensor(np.load(f"/srv/scratch/sanisetty3/clean/mint/evals/eval60/hml/{motion_name}")[None,...])
#     print(out_motion.shape)

    keypoints3d_gt = recover_from_ric(gt_motion[0,:seq_len] , 22).detach().cpu().numpy()
    keypoints3d_pred = recover_from_ric(out_motion[0,:seq_len] , 22).detach().cpu().numpy()

    real_features["kinetic"].append(extract_feature(keypoints3d_gt, "kinetic"))
    real_features["manual"].append(extract_feature(keypoints3d_gt, "manual"))

    result_features["kinetic"].append(extract_feature(keypoints3d_pred, "kinetic"))
    result_features["manual"].append(extract_feature(keypoints3d_pred, "manual"))

#     real_pfc.append(calc_physical_score(keypoints3d_gt))
#     pred_pfc.append(calc_physical_score(keypoints3d_pred))






    motion_beats = motion_peak_onehot(keypoints3d_gt[:seq_len])
    # get real data music beats
    audio_name = motion_name.split("_")[-3]

    audio_feature = np.load(os.path.join(audio_feature_dir, f"{audio_name}.npy"))
    audio_beats = audio_feature[:seq_len, -1] # last dim is the music beats
    # get beat alignment scores
    beat_score = alignment_score(audio_beats, motion_beats, sigma=1)
    beat_scores_real.append(beat_score)


    motion_beats = motion_peak_onehot(keypoints3d_pred[:seq_len])
    beat_score_pred = alignment_score(audio_beats, motion_beats, sigma=1)
    beat_scores_pred.append(beat_score_pred)


FID_k, Dist_k = calculate_frechet_feature_distance(real_features["kinetic"], result_features["kinetic"])
FID_g, Dist_g = calculate_frechet_feature_distance(real_features["manual"], result_features["manual"])


print("FID_k: ",FID_k,"Diversity_k:",Dist_k)
print("FID_g: ",FID_g,"Diversity_g:",Dist_g)

# print ("\PFC score on real data: %.3f\n" % (np.mean(real_pfc)))
# print ("\PFC score on generated data: %.3f\n" % (np.mean(pred_pfc)))


print ("\nBeat score on real data: %.3f\n" % (np.mean(beat_scores_real)))
print ("\nBeat score on generated data: %.3f\n" % (np.mean(beat_scores_pred)))



best_fid_k = FID_k if FID_k < best_fid_k else best_fid_k
best_fid_g = FID_g if FID_g < best_fid_g else best_fid_g
best_div_k = Dist_k if Dist_k > best_div_k else best_div_k
best_div_g = Dist_g if Dist_g > best_div_g else best_div_g

best_beat_align = np.mean(beat_scores_real) if np.mean(beat_scores_real) > best_beat_align else best_beat_align 



# return best_fid_k, best_fid_g,best_div_k,best_div_g,best_beat_align , np.mean(real_pfc), np.mean(pred_pfc)



100%|██████████| 44/44 [01:47<00:00,  2.44s/it]

FID_k:  7.528682831845714 Diversity_k: 10.279027991012589
FID_g:  7.848392292979781 Diversity_g: 7.421904608008726

Beat score on real data: 0.310


Beat score on generated data: 0.225






NameError: name 'best_fid_k' is not defined

## Transformed HML

In [143]:
keypoints3d_hml = np.load("/srv/scratch/sanisetty3/clean/mint/evals/eval60/keypoints3D_hml60.npy").reshape(-1,22,3)
keypoints3d_aist = np.load("/srv/scratch/sanisetty3/clean/mint/evals/eval60/keypoints3D_aist.npy")[:52756,:22,:]

from pycpd import RigidRegistration, DeformableRegistration
import numpy as np


In [149]:
reg = RigidRegistration(X=keypoints3d_aist[:100].reshape(-1,3), Y=keypoints3d_hml[:100].reshape(-1,3))

In [150]:
TY, (s_reg, R_reg, t_reg) = reg.register()

In [151]:
np.mean(TY - keypoints3d_aist[:100].reshape(-1,3))

6.646927100911472e-05

NameError: name 'points_to_transform' is not defined

In [162]:
aist_ds = VQFullMotionDataset("aist", split = "test" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
val_loader = DATALoader(aist_ds,1,collate_fn=None)

100%|██████████| 40/40 [00:00<00:00, 1387.74it/s]

Total number of motions 40





In [152]:
for batch in aist_loader:
    break

In [174]:
keypoints3d_gt = recover_from_ric(batch["motion"][0,:100] , 22).detach().cpu().numpy()

In [176]:
np.save("/srv/scratch/sanisetty3/music_motion/motion_vqvae/paper_renders/smpl/keypoints3d_gt.npy" , reg.transform_point_cloud(Y=keypoints3d_gt.reshape(-1,3)).reshape(-1,22,3))

In [172]:
result_features = {"kinetic": [], "manual": []}
real_features = {"kinetic": [], "manual": []}

mean = val_loader.dataset.mean
std = val_loader.dataset.std

beat_scores_real = []
beat_scores_pred = []

for i,aist_batch in enumerate(tqdm(val_loader)):


    mot_len = aist_batch["motion_lengths"][0]
    motion_name = aist_batch["names"][0]

    music_name = motion_name.split('_')[-2]
    music_encoding=  np.load(os.path.join(audio_encoding_dir , music_name + ".npy"))

    gen_motion_indices = torch.randint(0 , 1024 , (1,1))

    while gen_motion_indices.shape[1]<=seq_len:

       
        gen_motion_indices = trans_model.generate(start_tokens =gen_motion_indices.cuda(),\
                                                    seq_len=400 , \
                                                    context = torch.Tensor(music_encoding)[None,...].cuda(), \
                                                    context_mask=torch.ones((1 ,music_encoding.shape[0]) , dtype = torch.bool).cuda(),\
                                                    )

        gen_motion_indices = gen_motion_indices[gen_motion_indices<1024][None,...]

    try:
        out_motion = torch.zeros((aist_batch["motion"].shape[0] ,gen_motion_indices.shape[-1] , aist_batch["motion"].shape[-1]))
        for i in range(0 , seq_len, 200):
            quant , out_motion_= vqvae_model.decode(gen_motion_indices[:,i:i+200])
            out_motion[:,i:i+200] = out_motion_

        # quant , out_motion = vqvae_model.module.decode(gen_motion_indices[:,:mot_len])
    except:
        # quant , out_motion = vqvae_model.decode(gen_motion_indices[:,:mot_len])
        out_motion = torch.zeros((aist_batch["motion"].shape[0] ,gen_motion_indices.shape[-1] , aist_batch["motion"].shape[-1]))
        for i in range(0 , seq_len, 200):
            quant , out_motion_= vqvae_model.decode(gen_motion_indices[:,i:i+200])
            out_motion[:,i:i+200] = out_motion_


    keypoints3d_gt = recover_from_ric(aist_batch["motion"][0,:mot_len] , 22).detach().cpu().numpy()
    keypoints3d_gt = reg.transform_point_cloud(Y=keypoints3d_gt.reshape(-1,3)).reshape(-1,22,3)
    
    
    keypoints3d_pred = recover_from_ric(out_motion[0,:mot_len] , 22).detach().cpu().numpy()
    keypoints3d_pred = reg.transform_point_cloud(Y=keypoints3d_pred.reshape(-1,3)).reshape(-1,22,3)
    
    
    real_features["kinetic"].append(extract_feature(keypoints3d_gt, "kinetic"))
    real_features["manual"].append(extract_feature(keypoints3d_gt, "manual"))

    result_features["kinetic"].append(extract_feature(keypoints3d_pred, "kinetic"))
    result_features["manual"].append(extract_feature(keypoints3d_pred, "manual"))




    motion_beats = motion_peak_onehot(keypoints3d_gt[:mot_len])
    # get real data music beats
    audio_name = motion_name.split("_")[-2]

    audio_feature = np.load(os.path.join(audio_feature_dir, f"{audio_name}.npy"))
    audio_beats = audio_feature[:mot_len, -1] # last dim is the music beats
    # get beat alignment scores
    beat_score = alignment_score(audio_beats, motion_beats, sigma=1)
    beat_scores_real.append(beat_score)


    motion_beats = motion_peak_onehot(keypoints3d_pred[:mot_len])
    beat_score_pred = alignment_score(audio_beats, motion_beats, sigma=1)
    beat_scores_pred.append(beat_score_pred)


FID_k, Dist_k = calculate_frechet_feature_distance(real_features["kinetic"], result_features["kinetic"])
FID_g, Dist_g = calculate_frechet_feature_distance(real_features["manual"], result_features["manual"])


print("FID_k: ",FID_k,"Diversity_k:",Dist_k)
print("FID_g: ",FID_g,"Diversity_g:",Dist_g)


print ("\nBeat score on real data: %.3f\n" % (np.mean(beat_scores_real)))
print ("\nBeat score on generated data: %.3f\n" % (np.mean(beat_scores_pred)))

best_fid_k = FID_k if FID_k < best_fid_k else best_fid_k
best_fid_g = FID_g if FID_g < best_fid_g else best_fid_g
best_div_k = Dist_k if Dist_k > best_div_k else best_div_k
best_div_g = Dist_g if Dist_g > best_div_g else best_div_g

best_beat_align = np.mean(beat_scores_real) if np.mean(beat_scores_real) > best_beat_align else best_beat_align 



return best_fid_k, best_fid_g,best_div_k,best_div_g,best_beat_align


100%|██████████| 400/400 [00:08<00:00, 45.42it/s]
100%|██████████| 400/400 [00:10<00:00, 39.73it/s]
100%|██████████| 400/400 [00:08<00:00, 45.42it/s]
100%|██████████| 400/400 [00:10<00:00, 39.57it/s]
100%|██████████| 400/400 [00:08<00:00, 45.31it/s]
100%|██████████| 400/400 [00:10<00:00, 39.58it/s]
100%|██████████| 400/400 [00:08<00:00, 45.29it/s]
100%|██████████| 400/400 [00:10<00:00, 39.51it/s]
100%|██████████| 400/400 [00:08<00:00, 45.41it/s]
100%|██████████| 400/400 [00:10<00:00, 39.47it/s]
100%|██████████| 400/400 [00:08<00:00, 45.03it/s]
100%|██████████| 400/400 [00:10<00:00, 39.45it/s]
100%|██████████| 400/400 [00:08<00:00, 45.43it/s]
100%|██████████| 400/400 [00:10<00:00, 39.43it/s]
100%|██████████| 400/400 [00:08<00:00, 45.43it/s]
100%|██████████| 400/400 [00:10<00:00, 39.43it/s]
100%|██████████| 400/400 [00:08<00:00, 45.32it/s]
100%|██████████| 400/400 [00:10<00:00, 39.39it/s]
100%|██████████| 400/400 [00:08<00:00, 45.39it/s]
100%|██████████| 400/400 [00:10<00:00, 39.35it/s]


FID_k:  6.9577679600177476 Diversity_k: 10.06756729009824
FID_g:  11.167501952145543 Diversity_g: 7.1934799310488575

Beat score on real data: 0.244


Beat score on generated data: 0.263






NameError: name 'best_fid_k' is not defined

## Sinusoidal pos emb

FID_k:  3.8717503038434415 Diversity_k: 10.580599220288105
FID_g:  14.323117971786502 Diversity_g: 7.208645957555526

Beat score on real data: 0.244


Beat score on generated data: 0.196

### Albi

FID_k:  5.974822501678858 Diversity_k: 9.894211104435799
FID_g:  10.945797702730552 Diversity_g: 7.33098736114991

Beat score on real data: 0.244


Beat score on generated data: 0.207

## Style motion generative

In [79]:
from utils.eval_music import evaluate_music_motion_generative_style, evaluate_music_motion_generative_style2

In [None]:
aist_ds = VQFullMotionDataset("aist", split = "train" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)


In [74]:
aist_ds = VQFullMotionDataset("aist", split = "train" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
aist_loader = DATALoader(aist_ds,1,collate_fn=None)

100%|██████████| 1910/1910 [00:01<00:00, 1390.05it/s]

Total number of motions 1910





In [75]:
import clip
clip_model, clip_preprocess = clip.load("ViT-B/32", device=torch.device('cuda'), jit=False)  # Must set jit=False for training
clip_model.eval()
for p in clip_model.parameters():
    p.requires_grad = False


In [76]:
encodec_style = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/generator/var_len/trans_768_768_albi_aist_style/var_len_768_768_aist_style.yaml"


In [77]:
from configs.config import cfg, get_cfg_defaults
from core.models.motion_regressor import MotionRegressorModel


cfg_trans = get_cfg_defaults()
cfg_trans.merge_from_file(encodec_style)



trans_model = MotionRegressorModel(args = cfg_trans.motion_trans,pad_value=1025 ).eval()
pkg_trans = torch.load(f"{os.path.dirname(encodec_style)}/trans_motion.pt", map_location = 'cpu')
print(pkg_trans["steps"])
trans_model.load_state_dict(pkg_trans["model"])
trans_model =trans_model.cuda()


tensor([95000.])


In [78]:
evaluate_music_motion_generative_style(aist_loader , vqvae_model= vqvae_model ,net = trans_model,clip_model=clip_model,seq_len = 800)


NameError: name 'evaluate_music_motion_generative_style' is not defined

In [60]:
aist_ds = VQFullMotionDataset("aist", split = "train" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
aist_loader = DATALoader(aist_ds,1,collate_fn=None)

100%|██████████| 1910/1910 [00:22<00:00, 84.98it/s] 

Total number of motions 1910





In [80]:
evaluate_music_motion_generative_style2(aist_loader , vqvae_model= vqvae_model ,net = trans_model,clip_model=clip_model,seq_len = 800)


100%|██████████| 800/800 [00:21<00:00, 37.43it/s]
100%|██████████| 800/800 [00:21<00:00, 37.46it/s]]
100%|██████████| 800/800 [00:21<00:00, 37.45it/s]t]
100%|██████████| 800/800 [00:21<00:00, 37.42it/s]t]
100%|██████████| 800/800 [00:21<00:00, 37.37it/s]t]
100%|██████████| 800/800 [00:21<00:00, 37.33it/s]t]
100%|██████████| 800/800 [00:21<00:00, 37.35it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.35it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.30it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.34it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.30it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.31it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.32it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.32it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.30it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.31it/s]it]
100%|██████████| 800/800 [00:21<00:00, 37.25it/s]]  
100%|██████████| 800/800 [00:21<00:00, 37.31it/s]]
100%|██████████| 800/800 [00:21<00:00, 37.30it/s]]
100%|█

FID_k:  3.522845989252062 Diversity_k: 10.215643210963504
FID_g:  8.57888238870914 Diversity_g: 7.215440534382331

Beat score on real data: 0.172


Beat score on generated data: 0.184






(3.522845989252062,
 8.57888238870914,
 10.215643210963504,
 7.215440534382331,
 0.1715415597282555)

In [81]:
0.243*0.184 / 0.172

0.25995348837209303

## Render

In [7]:
audio_encoding_dir = "/srv/scratch/sanisetty3/music_motion/AIST/music"

genre_dict = {
"mBR" : "Break",
"mPO" : "Pop",
"mLO" : "Lock",
"mMH" : "Middle Hip-hop",
"mLH" : "LA style Hip-hop",
"mHO" : "House",    
"mWA" : "Waack",
"mKR" : "Krump",
"mJS" : "Street Jazz",
"mJB" : "Ballet Jazz",
}



In [42]:
style = None

In [49]:
for i,aist_batch in enumerate(tqdm(aist_loader)):
    break
motion_name = aist_batch["names"][0]

music_name = motion_name.split('_')[-2]
music_encoding=  np.load(os.path.join(audio_encoding_dir , music_name + ".npy"))

print(genre_dict.get(music_name[:3]))

  0%|          | 0/40 [00:00<?, ?it/s]

Ballet Jazz





In [50]:


mot_len = aist_batch["motion_lengths"][0]
motion_name = aist_batch["names"][0]

music_name = motion_name.split('_')[-2]
music_encoding=  np.load(os.path.join(audio_encoding_dir , music_name + ".npy"))

print(genre_dict.get(music_name[:3]))

genre = (genre_dict.get(music_name[:3])) if style is None else style

text = clip.tokenize([genre], truncate=True).cuda()
style_embeddings = clip_model.encode_text(text).cpu().float().reshape(-1) if clip_model is not None else None
gen_motion_indices = torch.randint(0 , 1024 , (1,1))
gen_motion_indices = trans_model.generate(start_tokens =gen_motion_indices.cuda(),\
                                        seq_len=400 , \
                                        context = torch.Tensor(music_encoding)[None,...].cuda(), \
                                        context_mask=torch.ones((1 ,music_encoding.shape[0]) , dtype = torch.bool).cuda(),\
                                        style_context = torch.Tensor(style_embeddings.reshape(-1))[None,...].cuda(),
                                        )
gen_motion_indices = gen_motion_indices[gen_motion_indices<1024][None,...]

quant , out_motion = vqvae_model.decode(gen_motion_indices)

Ballet Jazz


100%|██████████| 400/400 [00:07<00:00, 56.84it/s]


In [52]:
aist_batch["motion"].shape

torch.Size([1, 147, 263])

In [44]:
genre

'Slow'

In [19]:
sample_render(to_xyz(aist_batch["motion"][0:1].detach().cpu(),mean = aist_ds.mean , std = aist_ds.std), "style_gt" , "/srv/scratch/sanisetty3/music_motion/motion_vqvae/evals/decode_test")

render start


In [51]:
sample_render(to_xyz(out_motion[:,:mot_len].detach().cpu(),mean = aist_ds.mean , std = aist_ds.std), "style_none" , "/srv/scratch/sanisetty3/music_motion/motion_vqvae/evals/style/")

render start


### Music VQ

In [13]:

load_path_mix = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768_mix/checkpoints/vqvae_motion.295000.pt"
load_path_hml = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768/vqvae_motion.pt"
load_path_aist = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768_aist/vqvae_motion.pt"


In [14]:
from configs.config import cfg, get_cfg_defaults
from core.models.vqvae import VQMotionModel
from utils.eval_music import evaluate_music_motion_vqvae

cfg_vq = get_cfg_defaults()
cfg_vq.merge_from_file("/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768_mix/var_len_768_768_aist_vq.yaml")

load_path = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768_mix/vqvae_motion_best_fid.pt"


In [15]:

vqvae_model = VQMotionModel(cfg_vq.vqvae).eval()
pkg = torch.load(f"{load_path_mix}", map_location = 'cpu')
print(pkg["steps"])
vqvae_model.load_state_dict(pkg["model"])
vqvae_model =vqvae_model.cuda()



tensor([295000.])


In [17]:
aist_ds = VQFullMotionDataset("aist", split = "test" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
aist_loader = DATALoader(aist_ds,1,collate_fn=None)

100%|██████████| 40/40 [00:00<00:00, 1496.39it/s]

Total number of motions 40





In [21]:
print("pretrained only t2m")
evaluate_music_motion_vqvae(aist_loader,vqvae_model)

pretrained only t2m


100%|██████████| 40/40 [00:41<00:00,  1.04s/it]

FID_k:  3.2996737750179364 Diversity_k: 10.26604298215646
FID_g:  10.78302279919913 Diversity_g: 7.181474344852643
FID_k_real:  -7.86550347697812e-06 Diversity_k_real: 10.195780532558759
FID_g_real:  -1.9184653865522705e-13 Diversity_g_real: 7.348854861503992

Beat score on real data: 0.245


Beat score on generated data: 0.176






(3.2996737750179364,
 10.78302279919913,
 10.26604298215646,
 7.181474344852643,
 0.24494051462936942)

In [18]:

# print("pretrained mix")
# best_fid_k = []
# best_fid_g = []
# best_div_k = []
# best_div_g = []
# best_beat_align = []

# for i in range(20):

#     a,b,c,d,e = evaluate_music_motion_vqvae(aist_loader,vqvae_model)
#     best_fid_k.append(a)
#     best_fid_g.append(b)
#     best_div_k.append(c)
#     best_div_g.append(d)
#     best_beat_align.append(e)

    
# print("best_fid_k" , np.mean(best_fid_k))
# print("best_fid_g" , np.mean(best_fid_g))
# print("best_div_k" , np.mean(best_div_k))
# print("best_div_g" , np.mean(best_div_g))
# print("best_beat_align" , np.mean(best_beat_align))

In [19]:
print("mix")
evaluate_music_motion_vqvae(aist_loader,vqvae_model)


mix


100%|██████████| 40/40 [00:39<00:00,  1.00it/s]

FID_k:  2.635362356342995 Diversity_k: 10.163608189500295
FID_g:  7.295345718653849 Diversity_g: 7.234946262225127
FID_k_real:  -7.757004908626186e-06 Diversity_k_real: 10.205963216454554
FID_g_real:  -1.903529778246593e-09 Diversity_g_real: 7.344472836225461

Beat score on real data: 0.244


Beat score on generated data: 0.234






(2.635362356342995,
 7.295345718653849,
 10.163608189500295,
 7.234946262225127,
 0.244130060202235)

In [100]:
print("pretrained only t2m, finetuned only aist")
evaluate_music_motion_vqvae(aist_loader,vqvae_model)

pretrained only t2m, finetuned only aist


  5%|▌         | 2/40 [00:01<00:37,  1.01it/s]


KeyboardInterrupt: 

In [28]:
### Mixture
evaluate_music_motion_vqvae(aist_loader,vqvae_model)

100%|██████████| 1910/1910 [29:56<00:00,  1.06it/s]


FID_k:  0.010750366559051372 Diversity_k: 9.172959109891172
FID_g:  1.2350136226828567 Diversity_g: 7.381343867734843

Beat score on real data: 0.249


Beat score on generated data: 0.250



(0.010750366559051372,
 1.2350136226828567,
 9.172959109891172,
 7.381343867734843,
 0.24940255611332512)

## Generating token dataset

In [10]:
(0.11*255, 0.53*255, 0.8*255, 0.5*255)

(28.05, 135.15, 204.0, 127.5)

In [64]:
aist_ds = VQFullMotionDataset("aist", split = "train" , data_root = "/srv/scratch/sanisetty3/music_motion/AIST" , window_size = -1)
aist_loader = DATALoader(aist_ds,1,collate_fn=None)

100%|██████████| 1910/1910 [00:01<00:00, 1244.08it/s]

Total number of motions 1910





In [67]:
for batch in tqdm(aist_loader):
    
    n = int(batch["motion_lengths"])
    name = str(batch["names"][0])
    if n< 400:
        ind = vqvae_model.encode(batch["motion"].cuda())
    else:
#         ind = vqvae_model.encode(batch["motion"][:,:400].cuda())
        inds = []
        for i in range(0 , n, 200):
            ii = vqvae_model.encode(batch["motion"][:,i:i+200].cuda())
            inds.append(ii[0])
#             print(ii.shape)
        
        ind = torch.concatenate(inds)[None,...]
        
#     print(ind.shape)
    
    np.save(os.path.join("/srv/scratch/sanisetty3/music_motion/AIST/joint_indices" , name+".npy"),ind.cpu().numpy()[0])
        
#         quant , out_motion = vqvae_model.decode(ind)
    

100%|██████████| 1910/1910 [01:47<00:00, 17.82it/s]


In [68]:
mot_list = glob("/srv/scratch/sanisetty3/music_motion/AIST/joint_indices/*.npy")

In [69]:
np.load(mot_list[0]).shape[0]

174

In [71]:
lens = []
for i in mot_list:
    lens.append(np.load(i).shape[0])

In [72]:
max(lens)

959

In [11]:
hlm_ds = VQFullMotionDataset("t2m", split = "train" , data_root = "/srv/scratch/sanisetty3/music_motion/HumanML3D/HumanML3D/" , window_size = -1)
hlm_loader = DATALoader(hlm_ds,1,collate_fn=None)

100%|██████████| 23384/23384 [08:53<00:00, 43.85it/s] 

Total number of motions 23384





In [12]:
for batch in tqdm(hlm_loader):
    break

  0%|          | 0/23384 [00:00<?, ?it/s]


In [13]:
n = int(batch["motion_lengths"])
name = str(batch["names"][0])
print(n,name)

199 M003397


In [14]:
ind = vqvae_model.encode(batch["motion"].cuda())

In [15]:
ind.shape

torch.Size([1, 199])

In [16]:
for batch in tqdm(hlm_loader):
    
    n = int(batch["motion_lengths"])
    name = str(batch["names"][0])
    if n< 400:
        ind = vqvae_model.encode(batch["motion"].cuda())
    else:
        #ind = vqvae_model.encode(batch["motion"][:,:400].cuda())
#         out_motion = torch.zeros((batch["motion"].shape[0] ,gen_motion_indices.shape[-1] , aist_batch["motion"].shape[-1]))
        inds = []
        for i in range(0 , n, 200):
            inds.append(vqvae_model.encode(batch["motion"].cuda()))
        
        ind = torch.stack(inds)
    
    np.save(os.path.join("/srv/scratch/sanisetty3/music_motion/HumanML3D/HumanML3D/joint_indices" , name+".npy"), ind.cpu().numpy()[0])
    
        
    

  0%|          | 0/23384 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: '/srv/scratch/sanisetty3/music_motion/HumanML3D/HumanML3D/joint_indices/007648.npy'

In [20]:
sample_render(to_xyz(batch["motion"][0:1].detach().cpu(),mean = hlm_ds.mean , std = hlm_ds.std), "rnd_motion" , "/srv/scratch/sanisetty3/music_motion/motion_vqvae/evals/decode_test")

render start


## T2M Eval

In [6]:
import utils.utils_model as utils_model
from core.datasets import dataset_TM_eval
import utils.eval_trans as eval_trans
from core.models.evaluator_wrapper import EvaluatorModelWrapper
from utils.word_vectorizer import WordVectorizer
from utils.eval_trans import evaluation_vqvae_loss,evaluation_vqvae
from utils.eval_trans import calculate_R_precision,calculate_activation_statistics,calculate_diversity,calculate_frechet_distance
from tqdm import tqdm

In [7]:
w_vectorizer = WordVectorizer('/srv/scratch/sanisetty3/music_motion/T2M-GPT/glove', 'our_vab')
eval_wrapper = EvaluatorModelWrapper(cfg.eval_model)
tm_eval = dataset_TM_eval.DATALoader("t2m", True, 20, w_vectorizer, unit_length=4)


Loading Evaluation Model Wrapper (Epoch 28) Completed!!


100%|██████████| 4384/4384 [02:23<00:00, 30.56it/s]

4648 4648
Pointer Pointing at 0





In [15]:
from configs.config import cfg, get_cfg_defaults
from core.models.vqvae import VQMotionModel
from core.models.motion_regressor import MotionRegressorModel

load_path = "/srv/scratch/sanisetty3/music_motion/motion_vqvae/checkpoints/var_len/vq_768_768_aist/vqvae_motion.pt"
cfg_vq = get_cfg_defaults()
cfg_vq.merge_from_file("/srv/scratch/sanisetty3/music_motion/motion_vqvae/configs/var_len_768_768_aist_vq.yaml")

vqvae_model = VQMotionModel(cfg_vq.vqvae).eval()
pkg = torch.load(f"{load_path}", map_location = 'cpu')
print(pkg["steps"])
vqvae_model.load_state_dict(pkg["model"])
vqvae_model =vqvae_model.cuda()



tensor([275000.])


In [13]:
### Pretrained on t2m only
metrics = evaluation_vqvae_loss(val_loader = tm_eval, net= vqvae_model,nb_iter= 0, eval_wrapper = eval_wrapper,save = False,)

100%|██████████| 232/232 [00:43<00:00,  5.31it/s]


--> 	 Eva. Iter 0 :, FID. 0.0668, Diversity Real. 9.5584, Diversity. 9.9187, R_precision_real. [0.60193966 0.78189655 0.86810345], R_precision. [0.59439655 0.77801724 0.85991379], matching_score_real. 2.9862875124503825, matching_score_pred. 3.028119134902954


In [16]:
print("pretrained on t2m only and finetuned on aist")
metrics = evaluation_vqvae_loss(val_loader = tm_eval, net= vqvae_model,nb_iter= 0, eval_wrapper = eval_wrapper,save = False,)

pretrained on t2m only and finetuned on aist


100%|██████████| 232/232 [00:43<00:00,  5.30it/s]


--> 	 Eva. Iter 0 :, FID. 3.2204, Diversity Real. 9.3818, Diversity. 7.4288, R_precision_real. [0.59698276 0.78728448 0.86551724], R_precision. [0.43512931 0.64073276 0.75625   ], matching_score_real. 2.9870332890543443, matching_score_pred. 4.043809700012207


In [37]:
## Pretrained on a mix of aist and t2m
metrics = evaluation_vqvae_loss(val_loader = tm_eval, net= vqvae_model,nb_iter= 0, eval_wrapper = eval_wrapper,save = False,)

100%|██████████| 232/232 [00:42<00:00,  5.50it/s]


--> 	 Eva. Iter 0 :, FID. 0.0637, Diversity Real. 9.4620, Diversity. 9.4266, R_precision_real. [0.61616379 0.79482759 0.86982759], R_precision. [0.60172414 0.78405172 0.86077586], matching_score_real. 2.9635313979510602, matching_score_pred. 3.0240239735307366


## Render

In [115]:
from render_final import render, saveSMPL
from core.datasets.vqa_motion_dataset import TransMotionDatasetConditionalFull
from glob import glob

In [136]:
motions_list = glob("/srv/scratch/sanisetty3/clean/mint/evals/eval60/hml/*.npy" , recursive=False)


In [140]:
std = np.load("/srv/scratch/sanisetty3/music_motion/AIST/Std.npy")
mean = np.load("/srv/scratch/sanisetty3/music_motion/AIST/Mean.npy")

In [138]:
os.path.dirname(motions_list[0])

'/srv/scratch/sanisetty3/clean/mint/evals/eval60/hml'

In [141]:
for i in tqdm(motions_list):
    name =os.path.basename(i).split(".")[0]
    print(name)
    motion = torch.Tensor(np.load(i))
    motion_xyz = to_xyz(torch.Tensor(motion) , mean= mean , std = std)
    saveSMPL(motion_xyz[0].numpy(), outdir=os.path.dirname(i), name=name+"smpl", pred=True)

  0%|          | 0/44 [00:00<?, ?it/s]

gJS_sBM_cAll_d02_mJS0_ch09_mJS0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


  2%|▏         | 1/44 [00:31<22:33, 31.48s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gMH_sBM_cAll_d24_mMH5_ch09_mMH5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


  5%|▍         | 2/44 [01:00<21:25, 30.61s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gBR_sBM_cAll_d06_mBR5_ch04_mBR5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


  7%|▋         | 3/44 [01:31<20:59, 30.72s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gWA_sFM_cAll_d27_mWA5_ch20_mWA5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


  9%|▉         | 4/44 [02:01<20:31, 30.78s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLO_sFM_cAll_d15_mLO4_ch21_mLO4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 11%|█▏        | 5/44 [02:30<19:37, 30.18s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gWA_sBM_cAll_d27_mWA2_ch04_mWA2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 14%|█▎        | 6/44 [02:57<18:30, 29.21s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gJS_sBM_cAll_d03_mJS2_ch06_mJS2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 16%|█▌        | 7/44 [03:28<18:18, 29.70s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gPO_sBM_cAll_d12_mPO5_ch06_mPO5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 18%|█▊        | 8/44 [03:56<17:25, 29.04s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLH_sBM_cAll_d16_mLH1_ch07_mLH1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 20%|██        | 9/44 [04:23<16:39, 28.57s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gJS_sBM_cAll_d03_mJS5_ch10_mJS5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 23%|██▎       | 10/44 [04:49<15:44, 27.79s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gPO_sBM_cAll_d12_mPO2_ch10_mPO2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 25%|██▌       | 11/44 [05:16<15:05, 27.43s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gMH_sBM_cAll_d22_mMH0_ch07_mMH0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 27%|██▋       | 12/44 [05:44<14:44, 27.64s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gHO_sBM_cAll_d19_mHO1_ch09_mHO1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 30%|██▉       | 13/44 [06:12<14:19, 27.72s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gHO_sBM_cAll_d21_mHO3_ch10_mHO3
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 32%|███▏      | 14/44 [06:38<13:43, 27.46s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLH_sBM_cAll_d18_mLH2_ch09_mLH2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 34%|███▍      | 15/44 [07:08<13:38, 28.22s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gHO_sBM_cAll_d20_mHO4_ch05_mHO4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 36%|███▋      | 16/44 [07:37<13:10, 28.22s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gMH_sBM_cAll_d22_mMH2_ch05_mMH2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 39%|███▊      | 17/44 [08:06<12:53, 28.65s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLO_sFM_cAll_d13_mLO5_ch06_mLO5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 41%|████      | 18/44 [08:35<12:27, 28.75s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gHO_sFM_cAll_d19_mHO2_ch03_mHO2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 43%|████▎     | 19/44 [09:03<11:49, 28.37s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLO_sBM_cAll_d14_mLO1_ch04_mLO1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 45%|████▌     | 20/44 [09:28<11:00, 27.51s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gKR_sBM_cAll_d28_mKR1_ch09_mKR1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 48%|████▊     | 21/44 [10:00<11:03, 28.84s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gKR_sFM_cAll_d30_mKR3_ch18_mKR3
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 50%|█████     | 22/44 [10:27<10:22, 28.31s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gJB_sBM_cAll_d08_mJB1_ch03_mJB1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 52%|█████▏    | 23/44 [11:01<10:26, 29.81s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gKR_sBM_cAll_d29_mKR4_ch08_mKR4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 55%|█████▍    | 24/44 [11:32<10:07, 30.38s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gBR_sBM_cAll_d04_mBR2_ch05_mBR2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 57%|█████▋    | 25/44 [12:01<09:25, 29.75s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gBR_sBM_cAll_d04_mBR3_ch04_mBR3
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 59%|█████▉    | 26/44 [12:35<09:18, 31.02s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gPO_sBM_cAll_d12_mPO4_ch09_mPO4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 61%|██████▏   | 27/44 [13:03<08:33, 30.20s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLH_sBM_cAll_d17_mLH5_ch09_mLH5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 64%|██████▎   | 28/44 [13:38<08:28, 31.75s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gMH_sBM_cAll_d22_mMH1_ch08_mMH1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 66%|██████▌   | 29/44 [14:10<07:55, 31.70s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLO_sBM_cAll_d14_mLO0_ch05_mLO0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 68%|██████▊   | 30/44 [14:40<07:18, 31.32s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gJS_sBM_cAll_d02_mJS4_ch04_mJS4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 70%|███████   | 31/44 [15:07<06:27, 29.84s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gJB_sBM_cAll_d08_mJB0_ch05_mJB0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 73%|███████▎  | 32/44 [15:37<05:58, 29.89s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gMH_sBM_cAll_d24_mMH4_ch06_mMH4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 75%|███████▌  | 33/44 [16:05<05:23, 29.38s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gKR_sFM_cAll_d30_mKR0_ch15_mKR0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 77%|███████▋  | 34/44 [16:36<04:58, 29.84s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gWA_sBM_cAll_d26_mWA1_ch03_mWA1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 80%|███████▉  | 35/44 [17:03<04:21, 29.02s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gPO_sFM_cAll_d12_mPO0_ch15_mPO0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 82%|████████▏ | 36/44 [17:33<03:54, 29.31s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gHO_sBM_cAll_d20_mHO0_ch10_mHO0
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 84%|████████▍ | 37/44 [18:02<03:25, 29.32s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gJB_sBM_cAll_d09_mJB2_ch04_mJB2
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 86%|████████▋ | 38/44 [18:33<02:58, 29.75s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gKR_sBM_cAll_d29_mKR5_ch07_mKR5
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 89%|████████▊ | 39/44 [19:00<02:24, 28.85s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gWA_sBM_cAll_d26_mWA4_ch10_mWA4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 91%|█████████ | 40/44 [19:31<01:58, 29.65s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gWA_sBM_cAll_d27_mWA3_ch05_mWA3
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 93%|█████████▎| 41/44 [19:58<01:26, 28.73s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gLH_sBM_cAll_d18_mLH3_ch06_mLH3
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 95%|█████████▌| 42/44 [20:33<01:01, 30.54s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gBR_sBM_cAll_d05_mBR1_ch05_mBR1
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


 98%|█████████▊| 43/44 [21:03<00:30, 30.48s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])
gBR_sBM_cAll_d06_mBR4_ch05_mBR4
cuda:0
./body_models/
Running SMPLify, it may take a few minutes.


100%|██████████| 44/44 [21:29<00:00, 29.31s/it]

torch.Size([1, 25, 6, 1]) dict_keys(['pose', 'betas', 'cam'])
torch.Size([1, 6890, 3, 1])



