In [None]:
from models.detection.ofa_mbv3_w12_fasterrcnn import get_ofa_mbv3_w12_fasterrcnn_model, load_pretrained_fasterrcnn, set_training_params
# model = get_ofa_mbv3_w12_fcos_model()
model = get_ofa_mbv3_w12_fasterrcnn_model()
model.eval()
# print(model)

for name, module in model.named_children():
    print(f"{name}: {type(module).__name__}")

In [None]:
import time
import torch

def add_profiling_hooks(model, modules_to_profile={}):
    times = {}
    handles = []
    
    def start_hook(name):
        def hook(module, input):
            torch.cuda.synchronize()
            times[name + '_start'] = time.perf_counter()
        return hook
    
    def end_hook(name):
        def hook(module, input, output):
            torch.cuda.synchronize()
            times[name + '_end'] = time.perf_counter()
            duration = (times[name + '_end'] - times[name + '_start']) * 1000  # ms
            print(f"Starting {name}")  # Add start message
            duration = (times[name + '_end'] - times[name + '_start']) * 1000  # ms
            print(f"Finishing {name}, took {duration:.2f}ms")
        return hook
    
    for name, module in modules_to_profile.items():
        print(f"Adding hooks to {name}")
        handles.extend([
            module.register_forward_pre_hook(start_hook(name)),
            module.register_forward_hook(end_hook(name))
        ])
    
    return handles

def run_profiling(model, modules_to_profile, img_size=(1, 3, 800, 800), device='cuda', warmup=10, runs=100):
    # Prepare input
    dummy_input = torch.randn(img_size).to(device)
    model = model.to(device)
    model.eval()
    
    # Warmup
    print(f"Warming up ({warmup} runs)...")
    with torch.no_grad():
        for _ in range(warmup):
            model(dummy_input)
    
    # Profile runs
    print(f"\nProfiling ({runs} runs)...")
    handles = add_profiling_hooks(model, modules_to_profile)
    
    try:
        with torch.no_grad():
            for i in range(runs):
                if i % 10 == 0:
                    print(f"Run {i}/{runs}")
                model(dummy_input)
    finally:
        for handle in handles:
            handle.remove()

In [None]:
modules_to_profile = {
    'backbone': model.backbone,
    'rpn': model.rpn,
    'head': model.roi_heads
}

run_profiling(model, modules_to_profile, img_size=(1, 3, 800, 800), device='cuda', warmup=10, runs=50)

In [None]:
from models.detection.ofa_mbv3_w12_fcos import get_ofa_mbv3_w12_fcos_model
model = get_ofa_mbv3_w12_fcos_model()
model.eval()
for name, module in model.named_children():
    print(f"{name}: {type(module).__name__}")

In [None]:
modules_to_profile = {
    'backbone': model.backbone,
    'anchor_generator': model.anchor_generator,
    'head': model.head
}

run_profiling(model, modules_to_profile, img_size=(1, 3, 800, 800), device='cuda', warmup=10, runs=50)