# ResNet34-3D Inference → CSV (train/val)
Follow the steps below to produce cnn_preds/resnet_train.csv and cnn_preds/resnet_val.csv.


In [1]:
import os
from pathlib import Path
DATA_ROOT = Path(r"C:\\Users\\rubia\\Downloads").resolve()
print("DATA_ROOT =", DATA_ROOT)
SPLIT_DIRS = {"train": DATA_ROOT/"train", "val": DATA_ROOT/"val", "test": DATA_ROOT/"test"}
print(SPLIT_DIRS)
TRAIN_CASES_CSV = DATA_ROOT/"train_cases (2).csv"
VAL_CASES_CSV = DATA_ROOT/"val_cases.csv" if (DATA_ROOT/"val_cases.csv").exists() else DATA_ROOT/"val_cases (2).csv"
CNN_PREDS_DIR = DATA_ROOT/"cnn_preds"; CNN_PREDS_DIR.mkdir(parents=True, exist_ok=True)
print("TRAIN_CASES_CSV:", TRAIN_CASES_CSV)
print("VAL_CASES_CSV:", VAL_CASES_CSV)
print("CNN_PREDS_DIR:", CNN_PREDS_DIR)


DATA_ROOT = C:\Users\rubia\Downloads
{'train': WindowsPath('C:/Users/rubia/Downloads/train'), 'val': WindowsPath('C:/Users/rubia/Downloads/val'), 'test': WindowsPath('C:/Users/rubia/Downloads/test')}
TRAIN_CASES_CSV: C:\Users\rubia\Downloads\train_cases (2).csv
VAL_CASES_CSV: C:\Users\rubia\Downloads\val_cases.csv
CNN_PREDS_DIR: C:\Users\rubia\Downloads\cnn_preds


In [3]:
import pandas as pd
from pathlib import Path

DATA_ROOT = Path(r"C:\Users\rubia\Downloads")

src = DATA_ROOT / "train_cases (2).csv"   # your current file
dst = DATA_ROOT / "train_cases.csv"       # clean target

df = pd.read_csv(src, header=None, engine="python", sep=None)  # auto-detect delim
df = df.iloc[:, :2]                    # keep first two columns
df.columns = ["id", "label"]           # add headers
df.to_csv(dst, index=False, encoding="utf-8")  # write clean (no BOM)
print("Wrote:", dst)
from pathlib import Path

DATA_ROOT = Path(r"C:\Users\rubia\Downloads").resolve()
TRAIN_CASES_CSV = DATA_ROOT / "train_cases.csv"  # now guaranteed to have id,label
VAL_CASES_CSV   = DATA_ROOT / "val_cases.csv" if (DATA_ROOT / "val_cases.csv").exists() else DATA_ROOT / "val_cases (2).csv"


Wrote: C:\Users\rubia\Downloads\train_cases.csv


In [5]:
import pandas as pd, numpy as np, torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
try:
    import nibabel as nib
except Exception as e:
    raise ImportError("nibabel is required. Install with: pip install nibabel")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)
assert (DATA_ROOT/"train").exists(), "Missing train folder"
assert (DATA_ROOT/"val").exists(), "Missing val folder"
assert (TRAIN_CASES_CSV).exists(), "Missing train_cases.csv"
assert (VAL_CASES_CSV).exists(), f"Missing val_cases.csv at {VAL_CASES_CSV}"


DEVICE: cpu


In [7]:
import pandas as pd; pd.read_csv(r"C:\Users\rubia\Downloads\train_cases (2).csv", header=None).iloc[:, :2].to_csv(r"C:\Users\rubia\Downloads\train_cases.csv", index=False, header=["id","label"])


In [9]:
def conv3x3x3(in_planes, out_planes, stride=1):
    return nn.Conv3d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock3D(nn.Module):
    expansion=1
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1=conv3x3x3(inplanes, planes, stride); self.bn1=nn.BatchNorm3d(planes); self.relu=nn.ReLU(inplace=True)
        self.conv2=conv3x3x3(planes, planes); self.bn2=nn.BatchNorm3d(planes); self.downsample=downsample
    def forward(self,x):
        identity=x; out=self.relu(self.bn1(self.conv1(x))); out=self.bn2(self.conv2(out))
        if self.downsample is not None: identity=self.downsample(x)
        out+=identity; return self.relu(out)
class ResNet3D(nn.Module):
    def __init__(self, block, layers, num_classes=2, in_channels=1):
        super().__init__(); self.inplanes=64
        self.conv1=nn.Conv3d(in_channels,64,kernel_size=7,stride=(2,2,2),padding=3,bias=False); self.bn1=nn.BatchNorm3d(64); self.relu=nn.ReLU(inplace=True)
        self.maxpool=nn.MaxPool3d(kernel_size=3,stride=2,padding=1)
        self.layer1=self._make_layer(block,64,layers[0]); self.layer2=self._make_layer(block,128,layers[1],stride=2)
        self.layer3=self._make_layer(block,256,layers[2],stride=2); self.layer4=self._make_layer(block,512,layers[3],stride=2)
        self.avgpool=nn.AdaptiveAvgPool3d((1,1,1)); self.fc=nn.Linear(512*block.expansion,num_classes)
    def _make_layer(self, block, planes, blocks, stride=1):
        down=None
        if stride!=1 or self.inplanes!=planes*block.expansion:
            down=nn.Sequential(nn.Conv3d(self.inplanes,planes*block.expansion,kernel_size=1,stride=stride,bias=False), nn.BatchNorm3d(planes*block.expansion))
        layers=[block(self.inplanes,planes,stride,down)]; self.inplanes=planes*block.expansion
        for _ in range(1,blocks): layers.append(block(self.inplanes,planes))
        return nn.Sequential(*layers)
    def forward(self,x):
        x=self.conv1(x); x=self.bn1(x); x=self.relu(x); x=self.maxpool(x)
        x=self.layer1(x); x=self.layer2(x); x=self.layer3(x); x=self.layer4(x)
        x=self.avgpool(x); x=torch.flatten(x,1); return self.fc(x)
def resnet34_3d(num_classes=2, in_channels=1):
    return ResNet3D(BasicBlock3D, [3,4,6,3], num_classes=num_classes, in_channels=in_channels)


def _unwrap_state_dict(ckpt):
    if isinstance(ckpt, dict) and 'state_dict' in ckpt: sd=ckpt['state_dict']
    elif isinstance(ckpt, dict): sd=ckpt
    else: raise ValueError('Unexpected checkpoint format')
    if any(k.startswith('module.') for k in sd.keys()): sd={k.replace('module.','',1):v for k,v in sd.items()}
    return sd
def _infer_shapes_from_sd(sd):
    in_ch=1; ncls=2
    w=sd.get('conv1.weight');
    if isinstance(w, torch.Tensor) and w.ndim==5: in_ch=int(w.shape[1])
    fcw=sd.get('fc.weight');
    if isinstance(fcw, torch.Tensor) and fcw.ndim==2: ncls=int(fcw.shape[0])
    return in_ch, ncls
def load_resnet34_3d(weights_path: Path):
    try:
        ckpt=torch.load(weights_path,map_location='cpu',weights_only=True)
    except TypeError:
        ckpt=torch.load(weights_path,map_location='cpu')
    sd=_unwrap_state_dict(ckpt); in_ch,ncls=_infer_shapes_from_sd(sd)
    m=resnet34_3d(num_classes=ncls, in_channels=in_ch)
    missing,unexpected=m.load_state_dict(sd, strict=False)
    if missing: print('[3D loader] Missing keys (first 10):', missing[:10])
    if unexpected: print('[3D loader] Unexpected keys (first 10):', unexpected[:10])
    return m.to(DEVICE).eval()


CANDIDATE_FILENAMES = [
    'NM.nii.gz','NM.nii','nm.nii.gz','nm.nii',
    'T1.nii.gz','T1.nii','t1.nii.gz','t1.nii',
    'QSM.nii.gz','QSM.nii','qsm.nii.gz','qsm.nii',
]
def _list_dir(p: Path, max_items=10):
    try: items=sorted([f.name for f in p.iterdir()]); return items[:max_items]+(['...'] if len(items)>max_items else [])
    except Exception: return []
def _find_nifti_for_subject(base: Path, subject_id: str) -> Path:
    d=base/subject_id
    if not d.exists():
        raise FileNotFoundError(f'Subject folder not found: {d}\nExisting in {base} (first 10): {_list_dir(base)}')
    for n in CANDIDATE_FILENAMES:
        p=d/n
        if p.exists(): return p
    nii=list(d.glob('*.nii'))+list(d.glob('*.nii.gz'))
    if nii: return nii[0]
    raise FileNotFoundError(f'No NIfTI found in {d}. Tried known names and *.nii/*.nii.gz. Contents: {_list_dir(d)}')
class VolumeDataset(Dataset):
    def __init__(self, csv_path: Path, split_name: str, target_size=(128,128,128)):
        self.df = pd.read_csv(csv_path, engine="python", sep=None)
        self.df.columns = [c.strip().lower().replace("\ufeff","") for c in self.df.columns]  # strip BOM/whitespace        
        cols=[c.lower() for c in self.df.columns]
        if 'subject_id' not in self.df.columns:
            if 'id' in cols: self.df=self.df.rename(columns={self.df.columns[cols.index('id')]: 'subject_id'})
            else: raise ValueError("CSV must have 'subject_id' or 'id' column.")
        self.base=SPLIT_DIRS[split_name]; self.target_size=target_size
        mask=self.df['subject_id'].astype(str).apply(lambda sid: (self.base/sid).exists())
        missing=self.df.loc[~mask,'subject_id'].astype(str).tolist()
        if missing: print(f'[WARN] {len(missing)} subjects missing under {self.base}. Example(s): {missing[:5]}')
        self.df=self.df.loc[mask].reset_index(drop=True)
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        sid=str(self.df.iloc[idx]['subject_id']); p=_find_nifti_for_subject(self.base,sid)
        vol=nib.load(str(p)).get_fdata().astype(np.float32)
        m,s=float(vol.mean()), float(vol.std()) or 1.0; vol=(vol-m)/s
        t=torch.from_numpy(vol).float()
        if t.ndim!=3: raise ValueError(f'Expected 3D volume, got {tuple(t.shape)} for {p}')
        t=t.unsqueeze(0).unsqueeze(0)
        t=F.interpolate(t, size=self.target_size, mode='trilinear', align_corners=False)
        t=t.squeeze(0) # [C=1, D,H,W]
        return sid, t


weights_path = DATA_ROOT/'resnet_34.pth'
if not weights_path.exists():
    env_p=os.getenv('RESNET34_WEIGHTS')
    if env_p: weights_path=Path(env_p)

print('Using weights:', weights_path)
model=load_resnet34_3d(weights_path)
expected_in=model.conv1.in_channels
print('Model expects in_channels:', expected_in)
@torch.no_grad()
def infer_split(csv_path: Path, split_name: str, out_csv: Path, batch=1):
    ds=VolumeDataset(csv_path, split_name)
    dl=DataLoader(ds, batch_size=batch, shuffle=False, num_workers=0)
    rows=[]
    for sids, vols in dl:
        if vols.shape[1]!=expected_in: vols=vols.repeat(1, expected_in, 1,1,1)
        vols=vols.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        logits=model(vols)
        probs=torch.sigmoid(logits[:,0]) if logits.shape[1]==1 else torch.softmax(logits,dim=1)[:,1]
        rows += [{"subject_id": sid, "resnet_prob": float(p)} for sid,p in zip(sids, probs.cpu().numpy().tolist())]
    pd.DataFrame(rows).to_csv(out_csv, index=False)
    print(f'[OK] Saved {split_name} predictions → {out_csv}')
if TRAIN_CASES_CSV.exists(): infer_split(TRAIN_CASES_CSV, 'train', CNN_PREDS_DIR/'resnet_train.csv', batch=1)
else: print('[Skip] TRAIN_CASES_CSV not found:', TRAIN_CASES_CSV)
if VAL_CASES_CSV.exists(): infer_split(VAL_CASES_CSV, 'val', CNN_PREDS_DIR/'resnet_val.csv', batch=1)
else: print('[Skip] VAL_CASES_CSV not found:', VAL_CASES_CSV)

Using weights: C:\Users\rubia\Downloads\resnet_34.pth
[3D loader] Missing keys (first 10): ['layer2.0.downsample.0.weight', 'layer2.0.downsample.1.weight', 'layer2.0.downsample.1.bias', 'layer2.0.downsample.1.running_mean', 'layer2.0.downsample.1.running_var', 'layer3.0.downsample.0.weight', 'layer3.0.downsample.1.weight', 'layer3.0.downsample.1.bias', 'layer3.0.downsample.1.running_mean', 'layer3.0.downsample.1.running_var']
Model expects in_channels: 1
[OK] Saved train predictions → C:\Users\rubia\Downloads\cnn_preds\resnet_train.csv
[OK] Saved val predictions → C:\Users\rubia\Downloads\cnn_preds\resnet_val.csv
