In [1]:
from pathlib import Path
import os
from IPython.display import HTML
import wandb
import torch
from mp_transformer.models.transformer import MovementPrimitiveTransformer
from mp_transformer.config import CONFIG
from mp_transformer.train import setup
from mp_transformer.utils import save_side_by_side_strip

In [2]:
current_dir = Path.cwd().parts[-1]
if current_dir == "demo":
    os.chdir("..")
!pwd

/data/daniel/git/mp-transformer


In [10]:
run = wandb.init(project="mp-transformer")
artifact = run.use_artifact("tcs-mr/mp-transformer/model:v2", type='model')
artifact_dir = artifact.download()


[34m[1mwandb[0m: Downloading large artifact model:v2, 86.25MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.1


In [11]:
print(artifact_dir)

./artifacts/model:v2


In [12]:
model, train_dataset, val_dataset = setup(CONFIG)
model = model.load_from_checkpoint(Path(artifact_dir, "model.ckpt"), config=CONFIG)
run.finish()

In [13]:
item = val_dataset[0]
poses, timestamps = item["poses"], item["timestamps"]
# poses = torch.stack([poses[0, :], poses[-1, :]])
# timestamps = torch.stack([timestamps[0], timestamps[-1]])
# ys, timestamps = item["poses"], item["timestamps"]
y_hat = model.infer(poses, timestamps)

In [8]:
save_side_by_side_strip(item, model, CONFIG["num_primitives"])

Video saved to tmp/comp_vid0.mp4
Video saved to tmp/comp_vid1.mp4
Video saved to tmp/comp_vid2.mp4
Video saved to tmp/comp_vid3.mp4
Video saved to tmp/comp_vid4.mp4
Video saved to tmp/comp_vid5.mp4
Moviepy - Building video tmp/comp_strip.mp4.
Moviepy - Writing video tmp/comp_strip.mp4



                                                               

Moviepy - Done !
Moviepy - video ready tmp/comp_strip.mp4


In [12]:
HTML("""
<video width="320" height="240" controls>
  <source src="../tmp/comp_strip.mp4" type="video/mp4">
</video>
""")

In [14]:
init_latents = torch.zeros(1, 6, 128)
timestamps = timestamps.unsqueeze(0)
sampled_latents = torch.randn_like(init_latents)

In [18]:
out = model.decoder(timestamps, sampled_latents)
recons_sequence = out["recons_sequence"]
recons_sequence = recons_sequence.squeeze(0).detach().numpy()
recons_sequence.shape

(128, 3)

In [22]:
import numpy as np
import imageio
from mp_transformer.datasets.toy_dataset import unnormalize_pose
from mp_transformer.utils.generate_toy_data import BONE_LENGTHS, render_image

In [20]:
imgs = []
for rec in recons_sequence:
    rec = unnormalize_pose(rec)
    img = render_image(rec, BONE_LENGTHS)
    imgs.append(img)

In [25]:

output_file = f"tmp/gen_vid.mp4"
with imageio.get_writer(output_file, fps=10) as writer:
    for img in imgs:
        img_array = np.array(img)  # Convert PIL Image object to NumPy array
        writer.append_data(img_array)

print(f"Video saved to {output_file}")

Video saved to tmp/gen_vid.mp4


In [26]:
HTML("""
<video width="320" height="240" controls>
  <source src="../tmp/gen_vid.mp4" type="video/mp4">
</video>
""")