### Git Setup Code

In [10]:
import sys, importlib
sys.modules.pop("src", None)
sys.modules.pop("src.hello", None)
importlib.invalidate_caches()

In [11]:
!rm -rf /kaggle/working/biomed-lite-model
!git clone https://github.com/nasifsafwan/biomed-lite-model.git /kaggle/working/biomed-lite-model
!ls -R /kaggle/working/biomed-lite-model/src

Cloning into '/kaggle/working/biomed-lite-model'...
remote: Enumerating objects: 66, done.[K
remote: Counting objects: 100% (66/66), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 66 (delta 31), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (66/66), 22.17 KiB | 3.70 MiB/s, done.
Resolving deltas: 100% (31/31), done.
/kaggle/working/biomed-lite-model/src:
backbone.py  blocks.py	dataloader.py  head.py	hello.py  __init__.py  train.py


In [18]:
import sys, pathlib
repo_root = pathlib.Path("/kaggle/working/biomed-lite-model")
assert repo_root.exists(), "repo not found—did the clone succeed?"
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# sanity check
import importlib
importlib.invalidate_caches()
print("sys.path[0]:", sys.path[0])

sys.path[0]: /kaggle/working/biomed-lite-model


### RSNA

In [5]:
import os, math
from pathlib import Path
import pandas as pd
import numpy as np
import pydicom
from PIL import Image

# >>>> tweak these if you want <<<<
CSV_PATH   = "/kaggle/input/rsna-str-pulmonary-embolism-detection/train.csv"
DICOM_ROOT = "/kaggle/input/rsna-str-pulmonary-embolism-detection/train"
OUT_DIR    = Path("/kaggle/working/data/PE_small")
N_POS      = 800   # positives (pe_present_on_image==1)
N_NEG      = 800   # negatives (pe_present_on_image==0)
SEED       = 42

OUT_DIR.mkdir(parents=True, exist_ok=True)

def hu_window(dicom, center=100, width=700):
    """
    Window a CT slice into displayable range.
    Defaults: lung-ish window (adjust if you prefer).
    """
    # Convert raw to HU if rescale tags present
    img = dicom.pixel_array.astype(np.float32)
    slope = float(getattr(dicom, "RescaleSlope", 1))
    inter = float(getattr(dicom, "RescaleIntercept", 0))
    img = img * slope + inter

    low  = center - width / 2.0
    high = center + width / 2.0
    img = np.clip(img, low, high)

    # normalize to [0,255]
    img = (img - low) / (high - low + 1e-6)
    img = (img * 255.0).clip(0, 255).astype(np.uint8)
    return img

def save_png(dcm_path: Path, png_path: Path):
    ds = pydicom.dcmread(str(dcm_path))
    img = hu_window(ds)                   # (H, W), uint8
    Image.fromarray(img, mode="L").save(png_path)

# Build subset file list
df = pd.read_csv(CSV_PATH)
pos = df[df["pe_present_on_image"] == 1].sample(N_POS, random_state=SEED)
neg = df[df["pe_present_on_image"] == 0].sample(N_NEG, random_state=SEED)
sub = pd.concat([pos, neg]).reset_index(drop=True)

# Convert & copy to class folders
for i, row in sub.iterrows():
    study  = row["StudyInstanceUID"]
    series = row["SeriesInstanceUID"]
    sop    = row["SOPInstanceUID"]
    dcm    = Path(DICOM_ROOT) / study / series / f"{sop}.dcm"
    label  = "pe" if row["pe_present_on_image"] == 1 else "normal"
    out    = OUT_DIR / label / f"{sop}.png"
    out.parent.mkdir(parents=True, exist_ok=True)
    try:
        save_png(dcm, out)
    except Exception as e:
        # skip corrupt/missing files (rare)
        print(f"[skip] {dcm} -> {e}")
        continue

print("Subset ready at:", OUT_DIR)

[skip] /kaggle/input/rsna-str-pulmonary-embolism-detection/train/e0c019c80f74/38fabc5d9a3b/8c1d00805936.dcm -> Unable to decompress 'JPEG Lossless, Non-Hierarchical, First-Order Prediction (Process 14 [Selection Value 1])' pixel data because all plugins are missing dependencies:
	gdcm - requires gdcm>=3.0.10
	pylibjpeg - requires pylibjpeg>=2.0 and pylibjpeg-libjpeg>=2.1
[skip] /kaggle/input/rsna-str-pulmonary-embolism-detection/train/00c38669b4fd/efc2770a05cb/8504c69ee6ad.dcm -> Unable to decompress 'JPEG Lossless, Non-Hierarchical, First-Order Prediction (Process 14 [Selection Value 1])' pixel data because all plugins are missing dependencies:
	gdcm - requires gdcm>=3.0.10
	pylibjpeg - requires pylibjpeg>=2.0 and pylibjpeg-libjpeg>=2.1
[skip] /kaggle/input/rsna-str-pulmonary-embolism-detection/train/50f620ca2359/6c84a309d901/63c189973273.dcm -> Unable to decompress 'JPEG Lossless, Non-Hierarchical, First-Order Prediction (Process 14 [Selection Value 1])' pixel data because all plugin

In [6]:
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import torch

data_dir = "/kaggle/working/data/PE_small"

transform = T.Compose([
    T.Resize((224,224)),
    T.Grayscale(num_output_channels=3),
    T.ToTensor(),
    T.Normalize([0.5]*3, [0.5]*3),
])

full_ds = ImageFolder(root=data_dir, transform=transform)
n = len(full_ds)
n_train = int(0.8*n)
n_val   = int(0.1*n)
n_test  = n - n_train - n_val
train_ds, val_ds, test_ds = random_split(
    full_ds, [n_train, n_val, n_test],
    generator=torch.Generator().manual_seed(42)
)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=2)

print("Classes:", full_ds.classes)
print("Train/Val/Test:", len(train_ds), len(val_ds), len(test_ds))

Classes: ['normal', 'pe']
Train/Val/Test: 1227 153 154


In [19]:
from src.train import build_model, train_one_epoch, evaluate
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
model = build_model(num_classes=2).to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(40):
    tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
    va_loss, va_acc = evaluate(model, val_loader, criterion, device)
    print(f"Epoch {epoch+1}: train_loss={tr_loss:.4f}, val_loss={va_loss:.4f}, val_acc={va_acc:.4f}")

Epoch 1: train_loss=0.5719, val_loss=1.4687, val_acc=0.4444
Epoch 2: train_loss=0.4721, val_loss=0.5613, val_acc=0.6732
Epoch 3: train_loss=0.3384, val_loss=1.0251, val_acc=0.7190
Epoch 4: train_loss=0.1476, val_loss=0.9990, val_acc=0.6797
Epoch 5: train_loss=0.1210, val_loss=1.0335, val_acc=0.7778
Epoch 6: train_loss=0.0663, val_loss=1.8710, val_acc=0.5686
Epoch 7: train_loss=0.0334, val_loss=1.3723, val_acc=0.7712
Epoch 8: train_loss=0.0681, val_loss=1.2183, val_acc=0.7712
Epoch 9: train_loss=0.0824, val_loss=1.1223, val_acc=0.6993
Epoch 10: train_loss=0.0321, val_loss=1.2641, val_acc=0.7451
Epoch 11: train_loss=0.0143, val_loss=1.3723, val_acc=0.7516
Epoch 12: train_loss=0.0047, val_loss=1.4602, val_acc=0.7582
Epoch 13: train_loss=0.0157, val_loss=1.9888, val_acc=0.6667
Epoch 14: train_loss=0.0576, val_loss=1.1242, val_acc=0.7059
Epoch 15: train_loss=0.0662, val_loss=1.1093, val_acc=0.7255
Epoch 16: train_loss=0.0245, val_loss=1.1352, val_acc=0.6667
Epoch 17: train_loss=0.0101, val_

In [16]:
import sys, pathlib
repo_root = pathlib.Path("/kaggle/working/biomed-lite-model")
assert repo_root.exists(), "repo not found—did the clone succeed?"
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

# sanity check
import importlib
importlib.invalidate_caches()
print("sys.path[0]:", sys.path[0])

sys.path[0]: /kaggle/working/biomed-lite-model
