**To calculate computational efficieny on differencnt backbones**

**Load Model**

In [1]:
import os
import torch
from models.DiGATe_Unet import DiGATe_Unet

def load_digat_model(ex_no: str, backbone: str = "tf_efficientnet_b4", base_dir: str = "weights", device: str | None = None) -> torch.nn.Module:

    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"

    model = DiGATe_Unet(
        n_classes=1,
        backbone=backbone,
        n_channels=3,
        pretrained=True,
        pretrained_path=None,
        use_input_adapter=False,
        freeze_backbone=True,
        share_backbone=False,
    ).to(device)

    checkpoint_path = os.path.join(base_dir, "weights", f"{ex_no}.pth")
    checkpoint = torch.load(checkpoint_path, weights_only=False)

    # Handle both full checkpoints and plain state-dicts
    state_dict = (
        checkpoint["model_state_dict"]
        if "model_state_dict" in checkpoint
        else checkpoint
    )
    model.load_state_dict(state_dict)
    model.eval()
    return model

  from .autonotebook import tqdm as notebook_tqdm


**Computational Cost Analysis**

In [2]:
import torch
import time
from thop import profile
import copy

def analyze_model_efficiency(model, inputs):
    device = next(model.parameters()).device
    inputs = tuple(i.to(device) for i in inputs)
    model.eval()

    results = {}

    # 1. GFLOPs and Total Parameters
    model_for_profile = copy.deepcopy(model)
    with torch.no_grad():
        flops, params = profile(model_for_profile, inputs=inputs, verbose=False)
        results['GFLOPs'] = flops / 1e9
        results['Total Params (M)'] = params / 1e6

    # 2. Trainable Parameters
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    results['Trainable Params (M)'] = trainable_params / 1e6

    # 3. FPS, Latency, and Memory (GPU-specific)
    if device.type == 'cuda':
        # Memory Usage
        torch.cuda.reset_peak_memory_stats(device)
        with torch.no_grad():
            _ = model(*inputs)
        peak_memory_mb = torch.cuda.max_memory_allocated(device) / (1024 * 1024)
        results['Peak Memory (MB)'] = peak_memory_mb

        # FPS and Latency
        num_warmup = 20
        num_runs = 100
        
        # Create CUDA events for accurate timing
        start_event = torch.cuda.Event(enable_timing=True)
        end_event = torch.cuda.Event(enable_timing=True)
        
        # Warm-up runs
        with torch.no_grad():
            for _ in range(num_warmup):
                _ = model(*inputs)
        
        # Timing runs
        times = []
        with torch.no_grad():
            for _ in range(num_runs):
                start_event.record()
                _ = model(*inputs)
                end_event.record()
                
                # Wait for events to complete
                torch.cuda.synchronize()
                
                times.append(start_event.elapsed_time(end_event)) # Time in ms
        
        avg_latency_ms = sum(times) / len(times)
        fps = 1000.0 / avg_latency_ms
        
        results['Latency (ms)'] = avg_latency_ms
        results['FPS'] = fps

    else: # CPU measurements
        # Note: CPU timing is less precise and memory is harder to isolate.
        num_warmup = 10
        num_runs = 50
        
        # Warm-up
        with torch.no_grad():
            for _ in range(num_warmup):
                _ = model(*inputs)

        # Timing
        start_time = time.time()
        with torch.no_grad():
            for _ in range(num_runs):
                _ = model(*inputs)
        end_time = time.time()
        
        avg_latency_s = (end_time - start_time) / num_runs
        results['Latency (ms)'] = avg_latency_s * 1000
        results['FPS'] = 1.0 / avg_latency_s
        results['Peak Memory (MB)'] = 'N/A on CPU'

    return results

**Models**

In [None]:
BASE_DIR = "/home/user1/ms/DiGATe-UNet-LandSlide-Segmentation" 

**Computational Cost Analysis**

In [4]:
model = load_digat_model("E02", backbone="tf_efficientnet_b4", base_dir=BASE_DIR)

input1 = torch.randn(1, 3, 256, 256, device='cuda' if torch.cuda.is_available() else 'cpu')
input2 = torch.randn(1, 3, 256, 256, device='cuda' if torch.cuda.is_available() else 'cpu')
inputs = (input1, input2)

# Get the efficiency report
efficiency_report = analyze_model_efficiency(model, inputs)

print(f"--- Model Efficiency Report ---")
print("-----------------------------")
print(f"GFLOPs: {efficiency_report.get('GFLOPs', 0):.2f}")
print(f"Trainable Params (M): {efficiency_report.get('Trainable Params (M)', 0):.3f}")
print(f"FPS: {efficiency_report.get('FPS', 0):.2f}")
print(f"Peak Memory (MB): {efficiency_report.get('Peak Memory (MB)', 0):.2f}")

Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.


--- Model Efficiency Report ---
-----------------------------
GFLOPs: 4.19
Trainable Params (M): 1.238
FPS: 40.34
Peak Memory (MB): 303.62


**resnet50**

In [5]:
model = load_digat_model("d121", backbone="densenet121", base_dir=BASE_DIR)

input1 = torch.randn(1, 3, 256, 256, device='cuda' if torch.cuda.is_available() else 'cpu')
input2 = torch.randn(1, 3, 256, 256, device='cuda' if torch.cuda.is_available() else 'cpu')
inputs = (input1, input2)

# Get the efficiency report
efficiency_report = analyze_model_efficiency(model, inputs)

print(f"--- Model Efficiency Report ---")
print("-----------------------------")
print(f"GFLOPs: {efficiency_report.get('GFLOPs', 0):.2f}")
print(f"Trainable Params (M): {efficiency_report.get('Trainable Params (M)', 0):.3f}")
print(f"FPS: {efficiency_report.get('FPS', 0):.2f}")
print(f"Peak Memory (MB): {efficiency_report.get('Peak Memory (MB)', 0):.2f}")

--- Model Efficiency Report ---
-----------------------------
GFLOPs: 22.38
Trainable Params (M): 23.261
FPS: 35.56
Peak Memory (MB): 352.85


**ViT**

In [7]:
import torch

from models.DiGATe_Unet_Vit import DiGATe_Unet_Vit
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {DEVICE}')

model = DiGATe_Unet_Vit(1, backbone="vit_base_patch16_224", model_path='/home/user1/.cache/torch/hub/checkpoints/vit_base_patch16_224.pth').to(DEVICE)
checkpoint = torch.load(os.path.join(BASE_DIR, "weights", "vit_224.pth"), weights_only=False)
model.load_state_dict(checkpoint)

input1 = torch.randn(1, 3, 224, 224, device='cuda' if torch.cuda.is_available() else 'cpu')
input2 = torch.randn(1, 3, 224, 224, device='cuda' if torch.cuda.is_available() else 'cpu')
inputs = (input1, input2)

# Get the efficiency report
efficiency_report = analyze_model_efficiency(model, inputs)

print(f"--- Model Efficiency Report ---")
print("-----------------------------")
print(f"GFLOPs: {efficiency_report.get('GFLOPs', 0):.2f}")
print(f"Trainable Params (M): {efficiency_report.get('Trainable Params (M)', 0):.3f}")
print(f"FPS: {efficiency_report.get('FPS', 0):.2f}")
print(f"Peak Memory (MB): {efficiency_report.get('Peak Memory (MB)', 0):.2f}")

Using device: cuda


  return fn(*args, **kwargs)


--- Model Efficiency Report ---
-----------------------------
GFLOPs: 195.48
Trainable Params (M): 39.101
FPS: 22.41
Peak Memory (MB): 2362.48
