In [1]:
from lib.utils.learning import load_backbone
from lib.utils.tools import get_config
import os
import subprocess
from lib.model.model_action import ActionNet

def getActionNet(yaml):
    args = get_config(yaml)
    model_backbone = load_backbone(args)
    # if args.finetune:
    #     chk_filename = os.path.join(opts.pretrained, opts.selection)
    #     print('Loading backbone', chk_filename)
    #     checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)['model_pos']
    #     model_backbone = load_pretrained_weights(model_backbone, checkpoint)
    # if args.partial_train:
    #     model_backbone = partial_train_layers(model_backbone, args.partial_train)
    return   ActionNet(backbone=model_backbone, dim_rep=args.dim_rep, num_classes=args.action_classes, dropout_ratio=args.dropout_ratio, version=args.model_version, hidden_dim=args.hidden_dim, num_joints=args.num_joints)


def get_gpu_memory_usage():
    """Returns GPU memory usage in GB (first device only)."""
    result = subprocess.check_output(
        ['nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader']
    )
    memory_used = int(result.decode().split('\n')[0])
    return memory_used / 1024  # Convert MB to GB

In [4]:
import torch
import time

# Example model and input size
model = getActionNet('./configs/action/MB_ft_NTU60_xsub_lite_temp_comp.yaml').cuda()
model.eval()

input_tensor = torch.randn(1, 2, 243, 17, 3).cuda()

# Warm-up (to avoid initial overhead)
with torch.no_grad():
    for _ in range(10):
        model(input_tensor)

# Measure FPS
num_iters = 100
torch.cuda.synchronize()
start_time = time.time()

with torch.no_grad():
    for _ in range(num_iters):
        model(input_tensor)

torch.cuda.synchronize()
end_time = time.time()

elapsed_time = end_time - start_time
fps = num_iters / elapsed_time
print(f"FPS: {fps:.2f}")

True
FPS: 37.91


In [5]:
import torch
import time

# Example model and input size
model = getActionNet('./configs/action/MB_ft_NTU60_xsub_lite.yaml').cuda()
model.eval()

input_tensor = torch.randn(1, 2, 243, 17, 3).cuda()

# Warm-up (to avoid initial overhead)
with torch.no_grad():
    for _ in range(10):
        model(input_tensor)

# Measure FPS
num_iters = 100
torch.cuda.synchronize()
start_time = time.time()

with torch.no_grad():
    for _ in range(num_iters):
        model(input_tensor)

torch.cuda.synchronize()
end_time = time.time()

elapsed_time = end_time - start_time
fps = num_iters / elapsed_time
print(f"FPS: {fps:.2f}")

False
FPS: 18.73


In [5]:
import torch
import time

# Example model and input size
model = getActionNet('./configs/action/MB_ft_NTU60_xsub.yaml').cuda()
model.eval()

input_tensor = torch.randn(1, 2, 243, 17, 3).cuda()

# Warm-up (to avoid initial overhead)
with torch.no_grad():
    for _ in range(10):
        model(input_tensor)

# Measure FPS
num_iters = 1000
torch.cuda.synchronize()
start_time = time.time()

with torch.no_grad():
    for _ in range(num_iters):
        model(input_tensor)

torch.cuda.synchronize()
end_time = time.time()

elapsed_time = end_time - start_time
fps = num_iters / elapsed_time
print(f"FPS: {fps:.2f}")

FPS: 10.58
