In [4]:
import glob
import pprint

import torch
from torch import nn
from libs.core.config import load_config
import os
from libs.datasets.datasets import make_data_loader, make_dataset
from libs.modeling.models import make_meta_arch

from libs.utils.train_utils import fix_random_seed
"""0. load config"""
    # sanity check
    
class Args:
    ckpt = "./ckpt/thumos_i3d_VGGish_AVF_reproduce"
    config = "./configs/thumos_i3d_VGGish_AVF.yaml"
    epoch = -1
    topk = -1
    saveonly = False
    print_freq = 10
args = Args()


if os.path.isfile(args.config):
        cfg = load_config(args.config)
else:
    raise ValueError("Config file does not exist.")
assert len(cfg['val_split']) > 0, "Test set must be specified!"
if ".pth.tar" in args.ckpt:
    assert os.path.isfile(args.ckpt), "CKPT file does not exist!"
    ckpt_file = args.ckpt
else:
    assert os.path.isdir(args.ckpt), "CKPT file folder does not exist!"
    if args.epoch > 0:
        ckpt_file = os.path.join(
            args.ckpt, 'epoch_{:03d}.pth.tar'.format(args.epoch)
        )
    else:
        ckpt_file_list = sorted(glob.glob(os.path.join(args.ckpt, '*.pth.tar')))
        ckpt_file = ckpt_file_list[-1]
    assert os.path.exists(ckpt_file)

if args.topk > 0:
    cfg['model']['test_cfg']['max_seg_num'] = args.topk
print(str(cfg))

"""1. fix all randomness"""
# fix the random seeds (this will fix everything)
_ = fix_random_seed(0, include_cuda=True)

"""2. create dataset / dataloader"""
val_dataset = make_dataset(
    cfg['dataset_name'], False, cfg['val_split'], **cfg['dataset']
)
# set bs = 1, and disable shuffle
val_loader = make_data_loader(
    val_dataset, False, None, 1, cfg['loader']['num_workers']
)

"""3. create model and evaluator"""
# model
model = make_meta_arch(cfg['model_name'], **cfg['model'])

"""4. load ckpt"""
print("=> loading checkpoint '{}'".format(ckpt_file))
# load ckpt, reset epoch / best rmse
checkpoint = torch.load(
    ckpt_file,
    map_location = lambda storage, loc: storage.cuda(cfg['devices'][0])
)


# out = model(val_dataset[0])
# make_dot(out).render("rnn_torchviz", format="png")

# load ema model instead
print("Loading from EMA model ...")
try:
    model.load_state_dict(checkpoint['state_dict_ema'])
except:
    model.load_state_dict(checkpoint['state_dict'], strict=False)
    
del checkpoint


{'dataset_name': 'thumos_AVF', 'train_split': ['validation'], 'val_split': ['test'], 'dataset': {'json_file': './data/thumos/annotations/thumos14.json', 'feat_folder': './data/thumos/i3d_features', 'file_prefix': None, 'file_ext': '.npy', 'num_classes': 20, 'input_dim': 2048, 'feat_stride': 4, 'num_frames': 16, 'trunc_thresh': 0.5, 'crop_ratio': [0.9, 1.0], 'max_seq_len': 2304, 'default_fps': None, 'downsample_rate': 1, 'force_upsampling': False}, 'model': {'fpn_type': 'identity', 'max_buffer_len_factor': 6.0, 'n_mha_win_size': 19, 'backbone_type': 'AVFusionConvTransformer', 'backbone_arch': (2, 2, 5, 2, 2), 'scale_factor': 2, 'regression_range': [(0, 4), (4, 8), (8, 16), (16, 32), (32, 64), (64, 10000)], 'n_head': 4, 'embd_kernel_size': 3, 'embd_dim': 512, 'embd_with_ln': True, 'fpn_dim': 512, 'fpn_with_ln': True, 'fpn_start_level': 0, 'head_dim': 512, 'head_kernel_size': 3, 'head_num_layers': 3, 'head_with_ln': True, 'use_abs_pe': False, 'use_rel_pe': False, 'input_dim': 2048, 'num_c

In [5]:
print(model)

PtTransformer(
  (sigmoid): Sigmoid()
  (backbone): AVFusionConvTransformerBackbone(
    (relu): ReLU(inplace=True)
    (video_stem): ConvTransformerBackbone_StemOnly(
      (model): PtTransformer(
        (sigmoid): Sigmoid()
        (backbone): ConvTransformerBackbone_Orig(
          (relu): ReLU(inplace=True)
          (embd): ModuleList(
            (0): MaskedConv1D(
              (conv): Conv1d(2048, 512, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
            )
            (1): MaskedConv1D(
              (conv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
            )
          )
          (embd_norm): ModuleList(
            (0): LayerNorm()
            (1): LayerNorm()
          )
          (stem): ModuleList(
            (0): TransformerBlock(
              (ln1): LayerNorm()
              (ln2): LayerNorm()
              (attn): LocalMaskedMHCA(
                (query_conv): MaskedConv1D(
                  (conv): Conv1d(512, 512,

In [6]:
import torchviz
from torchviz import make_dot


In [7]:
video_list =  [
        {
            'video_id': 'video_validation_0000155',
            'feats': torch.rand((2048, 10)),  # Random tensor of size C x T
            'segments': torch.tensor([[265.0, 337.0]]),
            'labels': torch.tensor([3]),
            'fps': 30.0,
            'prompt': 'CleanAndJerk',
            'audio_track': torch.rand((128, 10), dtype=torch.float64),  # Random tensor of size A x T (audio track)
            'duration': 75.54,
            'active_label': 3,
            'feat_stride': 4,
            'feat_num_frames': 16
        }]

In [16]:
model.eval()
y = model([val_dataset[0]])



In [28]:
make_dot(y, params=model.named_params()).render("rnn_torchviz", format="png")

AttributeError: 'PtTransformer' object has no attribute 'named_params'