In [1]:
# ============ DeepConvContext (A100 80GB high-throughput v2.1) structure & size ============

import json
from pathlib import Path

import torch
import torch.nn as nn

print("\n[DeepConvContext (A100 80GB high-throughput v2.1) – structure & size]")

# ---------------------------
# 1) Determine NUM_CLASSES
# ---------------------------
BASE = Path("/content")
CFG_DIR = BASE / "configs"

if (CFG_DIR / "classes.json").exists():
    with open(CFG_DIR / "classes.json", "r") as f:
        classes_cfg = json.load(f)
    NUM_CLASSES = int(classes_cfg["num_classes"])
    print(f"Detected NUM_CLASSES from configs: {NUM_CLASSES}")
else:
    # Change this default if your experiment uses a different number of classes
    NUM_CLASSES = 8
    print("Warning: /content/configs/classes.json not found. Using default NUM_CLASSES = 8.")
    print("Please update NUM_CLASSES manually if this does not match your setup.")

# ---------------------------
# 2) Hyperparameters (must match training script)
# ---------------------------
NUM_CHANNELS      = 6
SAMPLES_PER_WIN   = 150
STRIDE_SAMPLES    = 75
CONTEXT_LEN_WINS  = 100

EPOCHS        = 30          # does not affect structure
BASE_LR       = 1e-4
WEIGHT_DECAY  = 1e-6
DROPOUT_P     = 0.5
BIDIRECTIONAL = True

CONV_CHANNELS    = 64
INTRA_LSTM_UNITS = 128
INTER_LSTM_UNITS = 128
PROJECTION_DIM   = 128
KERNEL_SIZE      = 9

print(f"\nConfig for size check:")
print(f"  NUM_CLASSES      = {NUM_CLASSES}")
print(f"  NUM_CHANNELS     = {NUM_CHANNELS}")
print(f"  SAMPLES_PER_WIN  = {SAMPLES_PER_WIN}")
print(f"  CONTEXT_LEN_WINS = {CONTEXT_LEN_WINS}")
print(f"  CONV_CHANNELS    = {CONV_CHANNELS}")
print(f"  INTRA_LSTM_UNITS = {INTRA_LSTM_UNITS}")
print(f"  INTER_LSTM_UNITS = {INTER_LSTM_UNITS}")
print(f"  PROJECTION_DIM   = {PROJECTION_DIM}")
print(f"  BIDIRECTIONAL    = {BIDIRECTIONAL}")

# ---------------------------
# 3) Model definition (must match Step 10 exactly)
# ---------------------------
class DeepConvLSTM_Intra(nn.Module):
    def __init__(self, in_ch=6, conv_ch=64, kernel_size=9, lstm_units=128):
        super().__init__()
        pad = kernel_size // 2
        self.conv1 = nn.Conv1d(in_ch,   conv_ch, kernel_size, padding=pad)
        self.conv2 = nn.Conv1d(conv_ch, conv_ch, kernel_size, padding=pad)
        self.conv3 = nn.Conv1d(conv_ch, conv_ch, kernel_size, padding=pad)
        self.conv4 = nn.Conv1d(conv_ch, conv_ch, kernel_size, padding=pad)
        self.relu  = nn.ReLU(inplace=True)
        self.lstm  = nn.LSTM(
            input_size=conv_ch,
            hidden_size=lstm_units,
            num_layers=1,
            batch_first=True
        )

    def forward(self, x_win):           # x_win: (N, C, T)
        x = self.relu(self.conv1(x_win))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = x.permute(0, 2, 1)          # (N, T, C)
        _, (h_n, _) = self.lstm(x)
        return h_n[-1]                  # (N, lstm_units)


class DeepConvContext(nn.Module):
    def __init__(self, num_channels=6, num_classes=8,
                 conv_channels=64, intra_lstm_units=128,
                 inter_lstm_units=128, projection_dim=128,
                 dropout=0.5, bidirectional=True):
        super().__init__()
        self.intra = DeepConvLSTM_Intra(
            in_ch=num_channels,
            conv_ch=conv_channels,
            kernel_size=KERNEL_SIZE,
            lstm_units=intra_lstm_units
        )
        self.proj  = nn.Linear(intra_lstm_units, projection_dim)
        self.inter = nn.LSTM(
            input_size=projection_dim,
            hidden_size=inter_lstm_units,
            num_layers=1,
            batch_first=True,
            bidirectional=bidirectional
        )
        inter_out = inter_lstm_units * (2 if bidirectional else 1)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(inter_out, num_classes)

    def forward(self, x):               # x: (B, S, C, T) → (B, S, K)
        B, S, C, T = x.shape
        x2d = x.reshape(B * S, C, T)
        feats = self.intra(x2d).view(B, S, -1)     # (B, S, intra_lstm_units)
        proj  = self.proj(feats)                   # (B, S, projection_dim)
        inter_out, _ = self.inter(proj)            # (B, S, inter_out)
        inter_out = self.dropout(inter_out)
        logits = self.fc(inter_out)                # (B, S, num_classes)
        return logits

# ---------------------------
# 4) Instantiate model and compute size
# ---------------------------
model = DeepConvContext(
    num_channels=NUM_CHANNELS,
    num_classes=NUM_CLASSES,
    conv_channels=CONV_CHANNELS,
    intra_lstm_units=INTRA_LSTM_UNITS,
    inter_lstm_units=INTER_LSTM_UNITS,
    projection_dim=PROJECTION_DIM,
    dropout=DROPOUT_P,
    bidirectional=BIDIRECTIONAL
)

print("\n====== nn.Module structure ======")
print(model)

# Parameter counts
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("\n====== Parameter statistics ======")
print(f"Total params:      {total_params:,}")
print(f"Trainable params:  {trainable_params:,}")

print("\n====== Per-layer parameter counts ======")
for name, p in model.named_parameters():
    print(f"{name:40s} shape={tuple(p.shape)}  params={p.numel():,}")

# Size estimation (weights only)
def fmt_mb(n_bytes: int) -> str:
    return f"{n_bytes / 1024 / 1024:.2f} MB"

bytes_fp32 = total_params * 4   # float32: 4 bytes per parameter
bytes_fp16 = total_params * 2   # float16: 2 bytes per parameter

print("\n====== Model size estimate (parameters only) ======")
print(f"FP32 (float32, 4B/param): {fmt_mb(bytes_fp32)}")
print(f"FP16 (float16, 2B/param): {fmt_mb(bytes_fp16)}")

# Save a randomly initialised state_dict to check actual .pth size
models_dir = BASE / "models"
models_dir.mkdir(parents=True, exist_ok=True)
tmp_path = models_dir / "deepconvcontext_a100_v21_dummy.pth"
torch.save(model.state_dict(), tmp_path)
file_bytes = tmp_path.stat().st_size
print(f"\nRandom-initialised state_dict saved to {tmp_path.name}")
print(f"Actual .pth file size: {fmt_mb(file_bytes)}")
tmp_path.unlink(missing_ok=True)

print("\n[DeepConvContext (A100 80GB high-throughput v2.1) structure & size – done]\n")


[DeepConvContext (A100 80GB high-throughput v2.1) – structure & size]
Please update NUM_CLASSES manually if this does not match your setup.

Config for size check:
  NUM_CLASSES      = 8
  NUM_CHANNELS     = 6
  SAMPLES_PER_WIN  = 150
  CONTEXT_LEN_WINS = 100
  CONV_CHANNELS    = 64
  INTRA_LSTM_UNITS = 128
  INTER_LSTM_UNITS = 128
  PROJECTION_DIM   = 128
  BIDIRECTIONAL    = True

DeepConvContext(
  (intra): DeepConvLSTM_Intra(
    (conv1): Conv1d(6, 64, kernel_size=(9,), stride=(1,), padding=(4,))
    (conv2): Conv1d(64, 64, kernel_size=(9,), stride=(1,), padding=(4,))
    (conv3): Conv1d(64, 64, kernel_size=(9,), stride=(1,), padding=(4,))
    (conv4): Conv1d(64, 64, kernel_size=(9,), stride=(1,), padding=(4,))
    (relu): ReLU(inplace=True)
    (lstm): LSTM(64, 128, batch_first=True)
  )
  (proj): Linear(in_features=128, out_features=128, bias=True)
  (inter): LSTM(128, 128, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_feature