In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

if "../" not in sys.path:
    sys.path.append("../")
    print("[sys.path]:", sys.path)

[sys.path]: ['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/nadir/MoLiNER/.venv/lib/python3.10/site-packages', '../']


In [3]:
import torch

In [4]:
from src.model.moliner import MoLiNER

from src.model.modules.motion_frames_encoders import TMRMotionFramesEncoder
from src.model.modules.prompts_tokens_encoders import CLIPPromptsTokensEncoder
from src.model.modules.prompt_representation_layers import MLPPromptRepresentationLayer
from src.model.modules.span_representation_layers import EndpointsSpanRepresentationLayer
from src.model.modules.scorers import ProductPairScorer
from src.model.modules.optimizers import AdamOptimizer
from src.model.modules.decoders import GreedyDecoder
from src.model.modules.spans_generators.static import StaticSpansGenerator
from src.model.modules.losses import FocalLoss

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
model = MoLiNER(
    motion_frames_encoder=TMRMotionFramesEncoder(frozen=False, pretrained=False),
    prompts_tokens_encoder=CLIPPromptsTokensEncoder(frozen=True),
    prompt_representation_layer=MLPPromptRepresentationLayer(input_dim=512, representation_dim=256, dropout=0.4),
    span_representation_layer=EndpointsSpanRepresentationLayer(motion_embed_dim=256, representation_dim=256, dropout=0.4),
    scorer=ProductPairScorer(),
    optimizer=AdamOptimizer({
        "scratch": 5e-5,
        "pretrained": 1e-5
    }),
    decoder=GreedyDecoder(
        strategy="FLAT"
    ),
    spans_generator=StaticSpansGenerator(
        min_width=1,
        max_width=10,
        step=1,
        padding_value=-1,
    ),
    loss=FocalLoss(
        alpha=0.25,
        gamma=2.0,
        # NOTE: 'none', 'mean', 'sum'
        reduction="sum",
        label_smoothing=0.0,
        # NOTE: 'labels', 'global', 'span'
        negatives_type="labels",
        negatives_probability=1.0,
        ignore_index=-100,
        threshold=0.75,
    ),
    postprocessors=[]
).to("cuda:1")

In [6]:
print(f"Allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
print(f"Reserved: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

Allocated: 0.00 MB
Reserved: 0.00 MB


In [7]:
model = model.eval()

In [8]:
import os

os.environ["TORCHDYNAMO_VERBOSE"] = "1"

In [10]:
from src.types import RawBatch, ProcessedBatch

raw_batch = RawBatch.create_random().to("cuda:1")
processed_batch = ProcessedBatch.from_raw_batch(raw_batch, encoder=model.prompts_tokens_encoder).to("cuda:1")

In [13]:
normal_model_output = model.predict(
    raw_batch=raw_batch,
    threshold=0.65
)

print("[raw_batch.keys()]:", raw_batch.__dict__.keys())
print("[motion.shape]:", raw_batch.raw_motion.shape)
for i, motion_mask in enumerate(raw_batch.motion_mask):
    print(f"[motion#{i}]:", motion_mask.sum().item())
    
print("[normal_model_output]:", normal_model_output)

[raw_batch.keys()]: dict_keys(['sid', 'dataset_name', 'amass_relative_path', 'raw_motion', 'transformed_motion', 'motion_mask', 'prompts'])
[motion.shape]: torch.Size([2, 100, 22, 3])
[motion#0]: 76.0
[motion#1]: 98.0
[normal_model_output]: EvaluationResult(motion_length=[76, 98], predictions=[[], [('person dances right', [(20, 22, 0.6542391180992126)])]])


In [20]:
from src.visualizations.spans import plot_evaluation_results

In [21]:
figures = plot_evaluation_results(normal_model_output)

[motion_predictions]: [('person dances right', [(20, 22, 0.6542391180992126)])]


In [23]:
figures[0]

In [24]:
figures[1]