# 1. Load Model

In [3]:
import torch
import yaml
import sys
import os
import re

In [7]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
# Add the 'lib' directory to the Python path
# Get the current working directory of the notebook
notebook_dir = os.getcwd()

# Construct the path to the 'lib' directory
lib_path = os.path.abspath(os.path.join(notebook_dir, 'lib'))

# Add the 'lib' directory to the Python path
sys.path.insert(0, lib_path)

from models import build_model
from config import config

# 2. Load the configuration
cfg_file = "./cvt-13-224x224.yaml"
config.defrost()
with open(cfg_file, 'r') as f:
    yaml_cfg = yaml.safe_load(f)
base_cfg = yaml_cfg.get('BASE', [])
if isinstance(base_cfg, str):
    base_cfg = [base_cfg]
config.merge_from_file(cfg_file)
config.MODEL.NUM_CLASSES = 10
config.freeze()
print("Configuration loaded successfully.")

# 3. Build the model
model: torch.nn.Module = build_model(config)
print("Model built successfully.")
model.to(DEVICE)

state_dict = torch.load("./CvT-13-224x224-IN-1k.pth", weights_only=True, map_location=DEVICE)

# 4. Extract Only Backbone Weights for Transfer Learning
# If the checkpoint is wrapped (e.g., {"model": sd} or DDP "module." prefix), unwrap/strip here:
if isinstance(state_dict, dict) and "state_dict" in state_dict:
    state_dict = state_dict["state_dict"]
state_dict = {k.removeprefix("module."): v for k, v in state_dict.items()}
# Patterns for head keys — adapt if your head is named differently
# (common names: "head.*", "mlp_head.*", "classifier.*", "fc.*")
head_pat = re.compile(r"^(head\.|mlp_head\.|classifier\.|fc\.)")
backbone_only = {k: v for k, v in state_dict.items() if not head_pat.match(k)}

missing, unexpected = model.load_state_dict(backbone_only, strict=False)
print("Missing keys:", missing)
print("Unexpected keys:", unexpected)
model.eval()

# 5. Perform a forward pass
input_tensor = torch.randn(6, 3, 224, 224).to(DEVICE)
with torch.no_grad():
    output = model(input_tensor)
print(f"Output tensor shape: {output.shape}")

Configuration loaded successfully.
Model built successfully.
Missing keys: ['head.weight', 'head.bias']
Unexpected keys: []
Output tensor shape: torch.Size([6, 10])


# 2. Load Data

In [11]:
CSV_PATH = "./data/train.csv"

In [10]:
import pandas as pd