In [1]:
# setting device on GPU if available, else CPU
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')


Using device: cpu



In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import os
import numpy as np
from tqdm import tqdm
from glob import glob

In [4]:
# pip install tqdm
# !conda install -c conda-forge imageio -y
# !pip install yacs
#!pip install einops
# !pip install --upgrade --no-cache-dir gdown
# !pip install smplx
# !pip install trimesh
# !pip install h5py
# !pip install pyrender
# !pip install shapely
# !pip install chumpy
# !pip install mapbox_earcut
# !pip install git+https://github.com/openai/CLIP.git
# !pip install moviepy
#pip install scipy -U

In [5]:
# conda install -c conda-forge shapely pyrender trimesh mapbox_earcut

In [6]:
# conda install -c menpo osmesa

In [7]:
from utils.motion_process import recover_from_ric
import visualize.plot_3d_global as plot_3d
from glob import glob
def to_xyz(motion, mean ,std , j = 22):
    motion_xyz = recover_from_ric(motion.cpu().float()*std+mean, j)
    motion_xyz = motion_xyz.reshape(motion.shape[0],-1, j, 3)
    return motion_xyz

            
def sample_render(motion_xyz , name , save_path):
    print(f"render start")
    
    gt_pose_vis = plot_3d.draw_to_batch(motion_xyz.numpy(),None, [os.path.join(save_path,name + ".gif")])



## Extract Encodec features

In [8]:
from encodec import EncodecModel
from encodec.utils import convert_audio
import torchaudio


In [9]:
encodec_model = EncodecModel.encodec_model_24khz()
encodec_model.set_target_bandwidth(6.0)

In [10]:
def get_music_encoding(music_path,encodec_model):
    wav, sr = torchaudio.load(music_path)
    wav = convert_audio(wav, sr, 6400, encodec_model.channels)
    wav = wav.unsqueeze(0)
    with torch.no_grad():
        encoded_frames = encodec_model.encode(wav[:,:,:])
    music_encoding = encodec_model.decode2emb(encoded_frames)[0].T
    return music_encoding

## Download models

In [27]:
!bash prepare/download_model.sh

The pretrained vqvae model files will be stored in the './checkpoints/vqvae/mix' folder

Downloading...
From (uriginal): https://drive.google.com/uc?id=1VPqPMC-28_8zerSIDzIMC1nQPMJgu3e3
From (redirected): https://drive.google.com/uc?id=1VPqPMC-28_8zerSIDzIMC1nQPMJgu3e3&confirm=t&uuid=bae8b4f2-8240-4bd9-bb60-01b83e4b8ddb
To: /home/sohan/Sem1/8903/FinalMusicMotion/MAGMA/checkpoints/vqvae/mix/vqvae_motion_best_fid.pt
100%|██████████████████████████████████████| 1.83G/1.83G [01:06<00:00, 27.5MB/s]

The pretrained motionseq model files will be stored in the './checkpoints/motionseq/encodec/' folder

Downloading...
From (uriginal): https://drive.google.com/uc?id=1JDetnPPX1YFJ004edBduWdrlwe-ZH48f
From (redirected): https://drive.google.com/uc?id=1JDetnPPX1YFJ004edBduWdrlwe-ZH48f&confirm=t&uuid=e1628370-b9a3-425b-8876-ca58d6491bc7
To: /home/sohan/Sem1/8903/FinalMusicMotion/MAGMA/checkpoints/motionseq/encodec/motionseq_encodec_best_fid.pt
100%|██████████████████████████████████████| 1.15G/1.15G

## VQVAE

In [11]:
from configs.config import cfg, get_cfg_defaults
from core.models.vqvae import VQMotionModel

cfg_vq = get_cfg_defaults()
cfg_vq.merge_from_file("./checkpoints/vqvae/mix/vqvae_mix.yaml")


cfg_trans = get_cfg_defaults()
cfg_trans.merge_from_file("./checkpoints/vqvae/mix/vqvae_mix.yaml")



In [12]:
vqvae_model = VQMotionModel(cfg_vq.vqvae).eval()
pkg = torch.load(f"./checkpoints/vqvae/mix/vqvae_motion_best_fid.pt", map_location = 'cpu')
print(pkg["steps"])
vqvae_model.load_state_dict(pkg["model"])
vqvae_model =vqvae_model.to(device)


tensor([295000.])


## MotionSeq

In [13]:
from core.models.motion_regressor import MotionRegressorModel

cfg_trans = get_cfg_defaults()
cfg_trans.merge_from_file("./checkpoints/motionseq/encodec/encodec.yaml")


trans_model = MotionRegressorModel(args = cfg_trans.motion_trans,pad_value=1025 ).eval()
pkg_trans = torch.load("./checkpoints/motionseq/encodec/motionseq_encodec_best_fid.pt", map_location = 'cpu')
print(pkg_trans["steps"])
trans_model.load_state_dict(pkg_trans["model"])
trans_model =trans_model.to(device)


tensor([210000.])


In [14]:
aist_mean = np.load("./mean_std/aist/Mean.npy")
aist_std = np.load("./mean_std/aist/Std.npy")

## Generate motion

In [15]:
def generate_motion_one_shot(music_encoding_):

    max_seq_len = music_encoding_.shape[0]
    print(max_seq_len)
    gen_motion_indices_ = torch.randint(0 , 1024 , (1,1))


    gen_motion = trans_model.generate(start_tokens =gen_motion_indices_.to(device),\
                                            seq_len=max_seq_len , \
                                            context = torch.Tensor(music_encoding_)[None,...].to(device), \
                                            context_mask=torch.ones((1 ,music_encoding_.shape[0]) , dtype = torch.bool).to(device),\
                                             )

    out_motion = torch.zeros((1 ,gen_motion.shape[-1] , 263))
    for i in range(0 , max_seq_len, 200):
        quant , out_motion_= vqvae_model.decode(gen_motion[:,i:i+200].to(device))
        out_motion[:,i:i+200] = out_motion_
        
    return out_motion



In [16]:
def generate_motion_parts(music_encoding):

    seq_len = 300
    max_seq_len = music_encoding.shape[0]
    print(max_seq_len)
    gen_motion_indices = torch.randint(0 , 1024 , (1,1))

    gen_motion = []
    torch.zeros((1,max_seq_len) , dtype = torch.long)


    for i in range(0,max_seq_len,seq_len):

        music_encoding_ = music_encoding[max(i-1 , 0):max(i-1 , 0)+seq_len]
        print("music_encoding_", music_encoding_.shape)

        gen_motion_indices_ = gen_motion_indices[:,-1:]

        gen_motion_indices = trans_model.generate(start_tokens =gen_motion_indices_.to(device),\
                                                  temperature = 0.8,
                                                seq_len=music_encoding_.shape[0] , \
                                                context = torch.Tensor(music_encoding_)[None,...].to(device), \
                                                context_mask=torch.ones((1 ,music_encoding_.shape[0]) , dtype = torch.bool).to(device),\
                                                 )

        gen_motion.append(gen_motion_indices[0,1:])


    gen_motion = torch.cat(gen_motion )[None, :max_seq_len]


    out_motion = torch.zeros((1 ,gen_motion.shape[-1] , 263))
    for i in range(0 , max_seq_len, 200):
        quant , out_motion_= vqvae_model.decode(gen_motion[:,i:i+200].to(device))
        out_motion[:,i:i+200] = out_motion_
        
    return out_motion



In [17]:
src = "./music/whip.mp3"

In [18]:
music_encoding = get_music_encoding(src, encodec_model)

In [19]:
music_encoding.shape

torch.Size([3286, 128])

In [20]:
generated_motion = generate_motion_parts(music_encoding[:50])

50
music_encoding_ torch.Size([50, 128])


100%|███████████████████████████████████████████| 50/50 [00:04<00:00, 10.26it/s]


## Render stick figures

In [21]:
music_name= os.path.basename(src).split(".")[0]
save_pth = "./results/"
sample_render(to_xyz(generated_motion.detach().cpu(),mean = aist_mean , std = aist_std), name = music_name+"_k" , save_path = save_pth)


render start


## Render SMPL 

In [57]:
!bash ./prepare/download_smpl.sh

The smpl files will be stored in the 'body_models/smpl/' folder

Downloading...
From (uriginal): https://drive.google.com/uc?id=1INYlGA76ak_cKGzvpOV2Pe6RkYTlXTW2
From (redirected): https://drive.google.com/uc?id=1INYlGA76ak_cKGzvpOV2Pe6RkYTlXTW2&confirm=t&uuid=b68bf8aa-96ba-4a14-8740-30ffd1f5c7fc
To: /home/sohan/Sem1/8903/FinalMusicMotion/MAGMA/body_models/smpl.zip
100%|██████████████████████████████████████| 35.3M/35.3M [00:01<00:00, 32.4MB/s]
Archive:  smpl.zip
   creating: smpl/
  inflating: smpl/J_regressor_extra.npy  
  inflating: smpl/smplfaces.npy      
  inflating: smpl/kintree_table.pkl  
  inflating: smpl/SMPL_NEUTRAL.pkl   
Cleaning

Downloading done!


In [22]:
### Install the following packages to render SMPL

# conda install -c menpo osmesa
# conda install h5py
# conda install -c conda-forge shapely pyrender trimesh mapbox_earcut

In [21]:
from render_final import render

In [22]:
music_name= os.path.basename(src).split(".")[0]

save_pth = "./results/"
motion_xyz = to_xyz(generated_motion.detach().cpu() , mean= aist_mean , std = aist_std)
render(motion_xyz[0].numpy(), outdir= os.path.join(save_pth), name=music_name+ "_smpl", pred=True ,device = device)

cpu
./body_models/
torch.Size([1, 6890, 3, 50])
0


ValueError: row index exceeds matrix dimensions

Collecting scipy
  Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[K     |████████████████████████████████| 34.5 MB 17.2 MB/s eta 0:00:01
Installing collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.7.1
    Uninstalling scipy-1.7.1:
      Successfully uninstalled scipy-1.7.1
Successfully installed scipy-1.10.1
Note: you may need to restart the kernel to use updated packages.
