# Model Analysis: Architecture Metrics & Profiling

## Purpose

This notebook provides comprehensive analysis of the Mask R-CNN model:

1. **Parameter Counting**: Total, trainable, and breakdown by component
2. **Model Size**: FP32 and FP16 estimates
3. **FLOPs Analysis**: Computational complexity for different input sizes
4. **Memory Profiling**: GPU memory requirements for training and inference
5. **Component Analysis**: Detailed breakdown of model architecture

## Usage

Set the path to your trained model checkpoint and run all cells to generate metrics.


In [None]:
import os
import sys
from pathlib import Path
from collections import OrderedDict

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add project root to path
project_root = Path(".").resolve().parent
sys.path.insert(0, str(project_root))

from models.maskrcnn_model import get_custom_maskrcnn

# Set plotting style
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("husl")

print(f"Project root: {project_root}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 1. Configuration & W&B Setup


In [None]:
import wandb

# Login to W&B
try:
    # For Kaggle
    from kaggle_secrets import UserSecretsClient

    user_secrets = UserSecretsClient()
    wandb_key = user_secrets.get_secret("wandb_key")
    wandb.login(key=wandb_key)
except:
    # For local - assumes you've already run 'wandb login'
    wandb.login()

print("✓ Logged into W&B")

In [None]:
# Model configuration
NUM_CLASSES = 16  # iSAID has 15 classes + background

# W&B artifact configuration
WANDB_ENTITY = "marek-olnk-put-pozna-"
WANDB_PROJECT = "isaid-custom-segmentation"
ARTIFACT_NAME = "isaid-maskrcnn-final:v0"

# Analysis settings
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
INPUT_SIZES = [(800, 800), (1024, 1024), (512, 512)]  # For FLOPs analysis

print(f"\nModel Configuration:")
print(f"  Number of classes: {NUM_CLASSES}")
print(f"  Device: {DEVICE}")
print(f"  W&B Artifact: {WANDB_ENTITY}/{WANDB_PROJECT}/{ARTIFACT_NAME}")

## 2. Download Model from W&B


In [None]:
# Download model artifact from W&B
print("Downloading model from W&B...")
run = wandb.init(project=WANDB_PROJECT, entity=WANDB_ENTITY, job_type="analysis")
artifact = run.use_artifact(
    f"{WANDB_ENTITY}/{WANDB_PROJECT}/{ARTIFACT_NAME}", type="model"
)
artifact_dir = artifact.download()

print(f"✓ Model downloaded to: {artifact_dir}")

# Set checkpoint path
CHECKPOINT_PATH = Path(artifact_dir) / "best_map_model.pth"
print(f"  Checkpoint: {CHECKPOINT_PATH}")

## 3. Load Model


In [None]:
# Create model
print("Creating model architecture...")
model = get_custom_maskrcnn(
    num_classes=NUM_CLASSES,
    pretrained_backbone=False,  # We'll load trained weights
)

# Load checkpoint
if CHECKPOINT_PATH.exists():
    print(f"Loading checkpoint from {CHECKPOINT_PATH}...")
    checkpoint = torch.load(CHECKPOINT_PATH, map_location="cpu")

    # Extract model weights from checkpoint (handles both formats)
    if "model_state_dict" in checkpoint:
        # Checkpoint with metadata (epoch, optimizer, etc.)
        model_weights = checkpoint["model_state_dict"]
        epoch = checkpoint.get("epoch", "unknown")
        print(f"  Checkpoint from epoch: {epoch}")
    else:
        # Direct state dict
        model_weights = checkpoint

    model.load_state_dict(model_weights)
    print("✓ Checkpoint loaded successfully")
else:
    print(f"Warning: Checkpoint not found at {CHECKPOINT_PATH}")
    print("Analyzing untrained model architecture")

model.to(DEVICE)
model.eval()

print(f"\nModel architecture:")
print(f"  - Backbone: EfficientNet-B0 with CBAM attention")
print(f"  - FPN: Custom with attention modules")
print(f"  - Number of classes: {NUM_CLASSES}")
print("\n✓ Model ready for analysis")

## 4. Parameter Counting


In [None]:
def count_parameters(model):
    """
    Count total and trainable parameters in the model.
    """
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    non_trainable_params = total_params - trainable_params

    return {
        "total": total_params,
        "trainable": trainable_params,
        "non_trainable": non_trainable_params,
    }


def format_number(num):
    """Format large numbers with K/M/B suffixes."""
    if num >= 1e9:
        return f"{num/1e9:.2f}B"
    elif num >= 1e6:
        return f"{num/1e6:.2f}M"
    elif num >= 1e3:
        return f"{num/1e3:.2f}K"
    else:
        return str(num)


# Count parameters
param_counts = count_parameters(model)

print("=" * 60)
print("PARAMETER COUNTS")
print("=" * 60)
print(
    f"Total Parameters:        {param_counts['total']:,} ({format_number(param_counts['total'])})"
)
print(
    f"Trainable Parameters:    {param_counts['trainable']:,} ({format_number(param_counts['trainable'])})"
)
print(
    f"Non-trainable Parameters: {param_counts['non_trainable']:,} ({format_number(param_counts['non_trainable'])})"
)
print(f"\nTrainable Ratio: {param_counts['trainable']/param_counts['total']*100:.1f}%")

## 5. Parameter Breakdown by Component


In [None]:
def analyze_component_parameters(model):
    """
    Analyze parameters breakdown by model component.
    """
    component_params = OrderedDict()

    # Backbone (excluding FPN if it's a child of backbone)
    if hasattr(model, "backbone"):
        # Get all backbone parameters
        backbone_params = sum(p.numel() for p in model.backbone.parameters())

        # Check if FPN is part of backbone
        fpn_params = 0
        if hasattr(model.backbone, "fpn"):
            fpn_params = sum(p.numel() for p in model.backbone.fpn.parameters())

        # Backbone excluding FPN
        backbone_only_params = backbone_params - fpn_params
        component_params["Backbone (excl. FPN)"] = backbone_only_params

        # Try to detect CBAM within backbone
        cbam_params = 0
        for name, module in model.backbone.named_modules():
            if "cbam" in name.lower() or "attention" in name.lower():
                cbam_params += sum(p.numel() for p in module.parameters())
        if cbam_params > 0:
            component_params["  - CBAM Modules"] = cbam_params
            component_params["  - Backbone (base)"] = backbone_only_params - cbam_params

    # FPN
    if hasattr(model, "backbone") and hasattr(model.backbone, "fpn"):
        fpn_params = sum(p.numel() for p in model.backbone.fpn.parameters())
        component_params["FPN"] = fpn_params

    # RPN
    if hasattr(model, "rpn"):
        rpn_params = sum(p.numel() for p in model.rpn.parameters())
        component_params["RPN"] = rpn_params

    # ROI Heads
    if hasattr(model, "roi_heads"):
        roi_heads = model.roi_heads

        # Box Head (includes box predictor)
        if hasattr(roi_heads, "box_head"):
            box_head_params = sum(p.numel() for p in roi_heads.box_head.parameters())
            component_params["Box Head"] = box_head_params

        # Box Predictor (only if separate from box_head)
        if hasattr(roi_heads, "box_predictor") and hasattr(
            roi_heads.box_predictor, "parameters"
        ):
            try:
                box_pred_params = sum(
                    p.numel() for p in roi_heads.box_predictor.parameters()
                )
                component_params["Box Predictor"] = box_pred_params
            except:
                pass

        # Mask Head (includes mask predictor)
        if hasattr(roi_heads, "mask_head"):
            mask_head_params = sum(p.numel() for p in roi_heads.mask_head.parameters())
            component_params["Mask Head"] = mask_head_params

    return component_params


# Analyze components
component_params = analyze_component_parameters(model)
total_params = param_counts["total"]

# Create DataFrame
records = []
for component, params in component_params.items():
    records.append(
        {
            "Component": component,
            "Parameters": params,
            "Formatted": format_number(params),
            "Percentage": f"{params/total_params*100:.2f}%",
        }
    )

component_df = pd.DataFrame(records)

print("\n" + "=" * 60)
print("PARAMETER BREAKDOWN BY COMPONENT")
print("=" * 60)
print(component_df.to_string(index=False))
print(f"\nTotal: {format_number(total_params)} (100%)")

In [None]:
# Visualize parameter distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Bar chart
# Filter out sub-components (those starting with spaces)
main_components = component_df[~component_df["Component"].str.startswith(" ")].copy()

ax1.barh(
    main_components["Component"],
    main_components["Parameters"],
    color="steelblue",
    alpha=0.8,
)
ax1.set_xlabel("Number of Parameters")
ax1.set_title("Parameters by Component")
ax1.grid(axis="x", alpha=0.3)

# Add value labels
for i, (comp, params, formatted) in enumerate(
    zip(
        main_components["Component"],
        main_components["Parameters"],
        main_components["Formatted"],
    )
):
    ax1.text(params, i, f" {formatted}", va="center", fontsize=9)

# Pie chart
colors = plt.cm.Set3(np.linspace(0, 1, len(main_components)))
wedges, texts, autotexts = ax2.pie(
    main_components["Parameters"],
    labels=None,
    autopct="%1.1f%%",
    startangle=90,
    colors=colors,
    pctdistance=0.85,
)
ax2.set_title("Parameter Distribution", pad=20)

# Make percentage text readable
for autotext in autotexts:
    autotext.set_color("white")
    autotext.set_fontsize(10)
    autotext.set_weight("bold")

# Add legend
ax2.legend(
    wedges,
    main_components["Component"],
    loc="center left",
    bbox_to_anchor=(1, 0, 0.5, 1),
    fontsize=10,
)

plt.tight_layout()
plt.show()

## 6. Model Size Estimation


In [None]:
def estimate_model_size(model, precision="fp32"):
    """
    Estimate model size in different precisions.

    Args:
        model: PyTorch model
        precision: 'fp32', 'fp16', or 'int8'

    Returns:
        Size in bytes
    """
    bytes_per_param = {"fp32": 4, "fp16": 2, "int8": 1}

    total_params = sum(p.numel() for p in model.parameters())
    size_bytes = total_params * bytes_per_param[precision]

    return size_bytes


def format_bytes(size_bytes):
    """Format bytes to human-readable format."""
    if size_bytes >= 1e9:
        return f"{size_bytes/1e9:.2f} GB"
    elif size_bytes >= 1e6:
        return f"{size_bytes/1e6:.2f} MB"
    elif size_bytes >= 1e3:
        return f"{size_bytes/1e3:.2f} KB"
    else:
        return f"{size_bytes} bytes"


# Calculate sizes
size_fp32 = estimate_model_size(model, "fp32")
size_fp16 = estimate_model_size(model, "fp16")
size_int8 = estimate_model_size(model, "int8")

print("\n" + "=" * 60)
print("MODEL SIZE ESTIMATES (Parameters Only)")
print("=" * 60)
print(f"FP32 (32-bit floating point): {format_bytes(size_fp32)}")
print(f"FP16 (16-bit floating point): {format_bytes(size_fp16)}")
print(f"INT8 (8-bit integer):         {format_bytes(size_int8)}")
print(f"\nCompression Ratios:")
print(f"  FP16 vs FP32: {size_fp32/size_fp16:.1f}x smaller")
print(f"  INT8 vs FP32: {size_fp32/size_int8:.1f}x smaller")

# Compare with actual checkpoint file size
if CHECKPOINT_PATH.exists():
    actual_size = CHECKPOINT_PATH.stat().st_size
    print(f"\n" + "=" * 60)
    print("CHECKPOINT FILE SIZE COMPARISON")
    print("=" * 60)
    print(f"Actual checkpoint file:      {format_bytes(actual_size)}")
    print(f"Calculated parameters only:  {format_bytes(size_fp32)}")
    print(f"Overhead:                    {format_bytes(actual_size - size_fp32)}")
    print(f"\nNote: Checkpoint includes optimizer state (momentum, variance),")
    print(f"      training metadata (epoch, losses), and BatchNorm statistics.")
    print(f"      Optimizer state typically adds ~1-2x the parameter size.")

# Visualize
fig, ax = plt.subplots(figsize=(8, 5))
precisions = ["FP32", "FP16", "INT8"]
sizes_mb = [size_fp32 / 1e6, size_fp16 / 1e6, size_int8 / 1e6]
colors = ["#e74c3c", "#3498db", "#2ecc71"]

bars = ax.bar(precisions, sizes_mb, color=colors, alpha=0.8)
ax.set_ylabel("Model Size (MB)")
ax.set_title("Model Size by Precision")
ax.grid(axis="y", alpha=0.3)

# Add value labels
for bar, size in zip(bars, sizes_mb):
    height = bar.get_height()
    ax.text(
        bar.get_x() + bar.get_width() / 2.0,
        height,
        f"{size:.1f} MB",
        ha="center",
        va="bottom",
        fontsize=11,
        fontweight="bold",
    )

plt.tight_layout()
plt.show()

## 7. FLOPs Analysis

Computational complexity for different input sizes.


In [None]:
try:
    from thop import profile, clever_format

    THOP_AVAILABLE = True
except ImportError:
    print("Warning: 'thop' package not installed. Installing...")
    import subprocess

    subprocess.check_call([sys.executable, "-m", "pip", "install", "thop"])
    from thop import profile, clever_format

    THOP_AVAILABLE = True


def estimate_flops(model, input_size):
    """
    Estimate FLOPs for a given input size.

    Args:
        model: PyTorch model
        input_size: Tuple of (height, width)

    Returns:
        Dictionary with FLOPs and parameter counts
    """
    model.eval()
    h, w = input_size

    # Create dummy input
    dummy_input = torch.randn(1, 3, h, w).to(next(model.parameters()).device)

    # Note: For detection models, we need to handle the output format
    # Some models return dictionaries during training but lists during eval

    try:
        with torch.no_grad():
            # Try profiling - this may not work for all detection models
            flops, params = profile(model, inputs=(dummy_input,), verbose=False)
            flops_str, params_str = clever_format([flops, params], "%.3f")

            return {
                "input_size": f"{h}×{w}",
                "flops": flops,
                "flops_str": flops_str,
                "params": params,
                "params_str": params_str,
            }
    except Exception as e:
        print(f"  Warning: Could not profile for {h}×{w}: {str(e)}")
        return None


if THOP_AVAILABLE:
    print("\n" + "=" * 60)
    print("FLOPs ANALYSIS")
    print("=" * 60)
    print("Note: For detection models, FLOPs estimation may be approximate.\n")

    flops_results = []
    for input_size in INPUT_SIZES:
        print(f"Analyzing {input_size[0]}×{input_size[1]}...")
        result = estimate_flops(model, input_size)
        if result:
            flops_results.append(result)
            print(f"  FLOPs: {result['flops_str']}")

    if flops_results:
        # Create summary table
        flops_df = pd.DataFrame(flops_results)
        print("\n" + "=" * 60)
        print("FLOPS SUMMARY")
        print("=" * 60)
        print(flops_df[["input_size", "flops_str"]].to_string(index=False))

        # Visualize FLOPs vs input size
        if len(flops_results) > 1:
            fig, ax = plt.subplots(figsize=(10, 6))
            input_sizes_str = [r["input_size"] for r in flops_results]
            flops_gflops = [r["flops"] / 1e9 for r in flops_results]

            bars = ax.bar(input_sizes_str, flops_gflops, color="coral", alpha=0.8)
            ax.set_ylabel("GFLOPs")
            ax.set_xlabel("Input Size")
            ax.set_title("Computational Complexity by Input Size")
            ax.grid(axis="y", alpha=0.3)

            # Add value labels
            for bar, gflops in zip(bars, flops_gflops):
                height = bar.get_height()
                ax.text(
                    bar.get_x() + bar.get_width() / 2.0,
                    height,
                    f"{gflops:.1f}\nGFLOPs",
                    ha="center",
                    va="bottom",
                    fontsize=10,
                )

            plt.tight_layout()
            plt.show()
else:
    print("\nSkipping FLOPs analysis (thop not available)")

## 8. Memory Profiling

Estimate GPU memory requirements for different batch sizes.


In [None]:
def estimate_memory_usage(model, batch_size=1, input_size=(800, 800), amp=False):
    """
    Estimate GPU memory usage during inference.

    Note: This is an approximation. Actual memory usage during training
    will be higher due to gradients, optimizer states, etc.
    """
    if not torch.cuda.is_available():
        print("CUDA not available - skipping GPU memory profiling")
        return None

    model = model.cuda()
    model.eval()

    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

    h, w = input_size
    dummy_input = [torch.randn(3, h, w).cuda() for _ in range(batch_size)]

    try:
        with torch.no_grad():
            if amp:
                with torch.cuda.amp.autocast():
                    _ = model(dummy_input)
            else:
                _ = model(dummy_input)

        peak_memory = torch.cuda.max_memory_allocated() / 1024**3  # Convert to GB
        return peak_memory

    except Exception as e:
        print(f"Error during memory profiling: {e}")
        return None
    finally:
        torch.cuda.empty_cache()


if torch.cuda.is_available():
    print("\n" + "=" * 60)
    print("GPU MEMORY PROFILING")
    print("=" * 60)
    print("Estimating inference memory usage...\n")

    memory_results = []
    batch_sizes = [1, 2, 4, 8]

    for bs in batch_sizes:
        print(f"Testing batch_size={bs}...")

        # FP32
        mem_fp32 = estimate_memory_usage(model, batch_size=bs, amp=False)
        if mem_fp32:
            print(f"  FP32: {mem_fp32:.2f} GB")

        # AMP (FP16)
        mem_amp = estimate_memory_usage(model, batch_size=bs, amp=True)
        if mem_amp:
            print(f"  AMP:  {mem_amp:.2f} GB")

        if mem_fp32 or mem_amp:
            memory_results.append(
                {
                    "Batch Size": bs,
                    "FP32 (GB)": f"{mem_fp32:.2f}" if mem_fp32 else "N/A",
                    "AMP (GB)": f"{mem_amp:.2f}" if mem_amp else "N/A",
                }
            )

    if memory_results:
        memory_df = pd.DataFrame(memory_results)
        print("\n" + "=" * 60)
        print("MEMORY USAGE SUMMARY (Inference)")
        print("=" * 60)
        print(memory_df.to_string(index=False))
        print(
            "\nNote: Training memory will be ~2-3x higher due to gradients and optimizer states."
        )

        # Visualize
        fig, ax = plt.subplots(figsize=(10, 6))
        x = np.arange(len(batch_sizes))
        width = 0.35

        fp32_mem = [
            float(r["FP32 (GB)"]) if r["FP32 (GB)"] != "N/A" else 0
            for r in memory_results
        ]
        amp_mem = [
            float(r["AMP (GB)"]) if r["AMP (GB)"] != "N/A" else 0
            for r in memory_results
        ]

        ax.bar(x - width / 2, fp32_mem, width, label="FP32", color="#e74c3c", alpha=0.8)
        ax.bar(
            x + width / 2,
            amp_mem,
            width,
            label="AMP (FP16)",
            color="#3498db",
            alpha=0.8,
        )

        ax.set_ylabel("GPU Memory (GB)")
        ax.set_xlabel("Batch Size")
        ax.set_title("Inference Memory Usage by Batch Size")
        ax.set_xticks(x)
        ax.set_xticklabels(batch_sizes)
        ax.legend()
        ax.grid(axis="y", alpha=0.3)

        plt.tight_layout()
        plt.show()
else:
    print("\nCUDA not available - skipping GPU memory profiling")

## 9. Summary Report

Generate a formatted summary suitable for copying to the report.


In [None]:
print("\n" + "=" * 70)
print("MODEL ANALYSIS SUMMARY REPORT")
print("=" * 70)

print("\n### 5.1 Model Size\n")
print("| Metric                   | Value       |")
print("| ------------------------ | ----------- |")
print(f"| **Total Parameters**     | {format_number(param_counts['total'])} |")
print(f"| **Trainable Parameters** | {format_number(param_counts['trainable'])} |")
print(f"| **Model Size (FP32)**    | {format_bytes(size_fp32)} |")
print(f"| **Model Size (FP16)**    | {format_bytes(size_fp16)} |")

print("\n### 5.2 Parameter Breakdown by Component\n")
print("| Component                  | Parameters | Percentage |")
print("| -------------------------- | ---------- | ---------- |")
for _, row in component_df.iterrows():
    # Clean up component name for table
    comp_name = row["Component"].replace("  - ", "  └─ ")
    print(f"| {comp_name:<26} | {row['Formatted']:<10} | {row['Percentage']:<10} |")
print(
    f"| **Total**                  | {format_number(total_params):<10} | 100%       |"
)

if torch.cuda.is_available() and memory_results:
    print("\n### 5.3 Memory Requirements\n")
    print("| Configuration                | GPU Memory |")
    print("| ---------------------------- | ---------- |")
    for result in memory_results:
        bs = result["Batch Size"]
        amp = result["AMP (GB)"]
        if amp != "N/A":
            print(f"| Training (batch_size={bs}, AMP) | ~{amp} GB |")
    print(f"| Inference (single image)     | ~{memory_results[0]['AMP (GB)']} GB |")

if THOP_AVAILABLE and flops_results:
    print("\n### 5.4 FLOPs Analysis\n")
    print("| Input Size | FLOPs      |")
    print("| ---------- | ---------- |")
    for result in flops_results:
        print(f"| {result['input_size']:<10} | ~{result['flops_str']:<10} |")

print("\n" + "=" * 70)
print("Copy the tables above to your REPORT.md file")
print("=" * 70)

## 10. Export Results

Save analysis results for future reference.


In [None]:
import json

# Create output directory
output_dir = project_root / "analysis_results"
output_dir.mkdir(parents=True, exist_ok=True)

# Prepare export data
export_data = {
    "model_config": {
        "num_classes": NUM_CLASSES,
        "wandb_artifact": f"{WANDB_ENTITY}/{WANDB_PROJECT}/{ARTIFACT_NAME}",
    },
    "parameters": {
        "total": int(param_counts["total"]),
        "trainable": int(param_counts["trainable"]),
        "non_trainable": int(param_counts["non_trainable"]),
        "total_formatted": format_number(param_counts["total"]),
        "trainable_formatted": format_number(param_counts["trainable"]),
    },
    "component_breakdown": [
        {
            "component": row["Component"],
            "parameters": int(row["Parameters"]),
            "formatted": row["Formatted"],
            "percentage": row["Percentage"],
        }
        for _, row in component_df.iterrows()
    ],
    "model_size": {
        "fp32_bytes": int(size_fp32),
        "fp16_bytes": int(size_fp16),
        "int8_bytes": int(size_int8),
        "fp32_formatted": format_bytes(size_fp32),
        "fp16_formatted": format_bytes(size_fp16),
        "int8_formatted": format_bytes(size_int8),
    },
}

if THOP_AVAILABLE and flops_results:
    export_data["flops"] = [
        {
            "input_size": r["input_size"],
            "flops": int(r["flops"]),
            "flops_formatted": r["flops_str"],
        }
        for r in flops_results
    ]

if torch.cuda.is_available() and memory_results:
    export_data["memory_usage"] = memory_results

# Save to JSON
output_file = output_dir / "model_analysis.json"
with open(output_file, "w") as f:
    json.dump(export_data, f, indent=2)

print(f"\n✓ Analysis results saved to: {output_file}")

# Also save component breakdown as CSV
component_df.to_csv(output_dir / "model_components.csv", index=False)
print(f"✓ Component breakdown saved to: {output_dir / 'model_components.csv'}")