<a href="https://colab.research.google.com/github/karaxstone7/PS-10/blob/main/pretraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip -q install gdown
FILE_ID = "1SQkVnzePJssdMiRzEI1xPpaeNxo0guOy"  # e.g. "1abcDEFghIJ..."
!mkdir -p "/content/drive/My Drive/datasets/mock"
!gdown --id $FILE_ID -O "/content/drive/My Drive/datasets/mock/L1C1.zip"


Downloading...
From (original): https://drive.google.com/uc?id=1SQkVnzePJssdMiRzEI1xPpaeNxo0guOy
From (redirected): https://drive.google.com/uc?id=1SQkVnzePJssdMiRzEI1xPpaeNxo0guOy&confirm=t&uuid=2dd0a91f-b586-488f-8035-cfa0959af22b
To: /content/drive/My Drive/datasets/mock/L1C1.zip
100% 810M/810M [00:12<00:00, 66.3MB/s]


In [None]:
!pip -q install gdown
FILE_ID = "1TxLxTmjPS11vMJq9hRs3wx4QZiUW60Fa"  # e.g. "1abcDEFghIJ..."
!mkdir -p "/content/drive/My Drive/datasets/mock"
!gdown --id $FILE_ID -O "/content/drive/My Drive/datasets/mock/L2A2.zip"


Downloading...
From (original): https://drive.google.com/uc?id=1TxLxTmjPS11vMJq9hRs3wx4QZiUW60Fa
From (redirected): https://drive.google.com/uc?id=1TxLxTmjPS11vMJq9hRs3wx4QZiUW60Fa&confirm=t&uuid=f0e416d2-644d-43db-a9a0-93c93dc58171
To: /content/drive/My Drive/datasets/mock/L2A2.zip
100% 1.20G/1.20G [00:18<00:00, 64.8MB/s]


In [None]:
import os, shutil, pathlib, zipfile, glob

ROOT_DRIVE = pathlib.Path("/content/drive/My Drive/datasets")
RAW_DIR    = ROOT_DRIVE / "S2Looking_raw"
OUT_DIR    = ROOT_DRIVE / "S2Looking"

RAW_DIR.mkdir(parents=True, exist_ok=True)
OUT_DIR.mkdir(parents=True, exist_ok=True)

# 1) Unzip anything in RAW_DIR (skip if already unzipped)
for z in RAW_DIR.glob("*.zip"):
    with zipfile.ZipFile(z, 'r') as f:
        f.extractall(RAW_DIR)


Splitting the dataset into train,val and test


In [None]:
import os, re, shutil
from pathlib import Path
from PIL import Image

SRC_ROOT = Path("/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking")
DST_ROOT = Path("/content/drive/MyDrive/datasets/S2Looking_MobileCDNet")

(A := DST_ROOT/"A").mkdir(parents=True, exist_ok=True)
(B := DST_ROOT/"B").mkdir(parents=True, exist_ok=True)
(L := DST_ROOT/"label").mkdir(parents=True, exist_ok=True)
(LIST := DST_ROOT/"list").mkdir(parents=True, exist_ok=True)

splits = ["train","val","test"]
ACCEPT_EXTS = {".png",".jpg",".jpeg",".tif",".tiff",".bmp"}

def rcollect(folder, exts=ACCEPT_EXTS):
    files = []
    for ext in exts:
        files += list(folder.rglob(f"*{ext}"))
    return sorted(files)

def looks_like_images_dir(d: Path, min_files=50):
    if not d or not d.exists() or not d.is_dir():
        return False
    files = rcollect(d)
    return len(files) >= min_files

def find_modality_dir_for_split(root: Path, split: str, modality_candidates=("img1","image1","A","t1","img2","image2","B","t2")):
    """
    Returns dict {'img1': Path, 'img2': Path} for the split if found, else raises.
    Tries:
      Case A: root/split/{img1,img2,...}
      Case B: root/{img1,img2,...}/split
      Fallback: global search for dirs containing split in path and having many images.
    """
    # Case A
    caseA = {}
    baseA = root/split
    for mod in modality_candidates:
        d = baseA/mod
        if looks_like_images_dir(d):
            caseA[mod] = d
    # Assign best matches to roles
    def choose(dmap, want=("img1","image1","A","t1"), alt=("img2","image2","B","t2")):
        d1 = next((dmap[k] for k in want if k in dmap), None)
        d2 = next((dmap[k] for k in alt  if k in dmap), None)
        return d1, d2

    d1, d2 = choose(caseA)
    if d1 and d2:
        return {"img1": d1, "img2": d2, "layout":"split-first"}

    # Case B
    caseB = {}
    for mod in modality_candidates:
        d = root/mod/split
        if looks_like_images_dir(d):
            caseB[mod] = d
    d1, d2 = choose(caseB)
    if d1 and d2:
        return {"img1": d1, "img2": d2, "layout":"modality-first"}

    # Fallback: global search (one level deeper) for anything with split in its path and many images
    candidates = []
    for p in root.rglob("*"):
        try:
            if p.is_dir() and split in str(p):
                if looks_like_images_dir(p, min_files=10):
                    candidates.append(p)
        except Exception:
            pass

    # Heuristic: pick two distinct parents that look like img1/img2 sides
    # Prefer paths containing keywords
    def score_dir(p: Path):
        s = str(p).lower()
        score = 0
        if "img1" in s or "image1" in s or re.search(r"(^|/|_)(a|t1)(/|_|$)", s):
            score += 2
        if "img2" in s or "image2" in s or re.search(r"(^|/|_)(b|t2)(/|_|$)", s):
            score += 1
        # more files = slightly higher
        score += min(len(rcollect(p))//100, 3)
        return score

    # Split into two groups by keyword
    group1 = [p for p in candidates if re.search(r"img1|image1|(^|/|_)(a|t1)(/|_|$)", str(p).lower())]
    group2 = [p for p in candidates if re.search(r"img2|image2|(^|/|_)(b|t2)(/|_|$)", str(p).lower())]

    d1 = max(group1, key=score_dir) if group1 else None
    d2 = max(group2, key=score_dir) if group2 else None
    if d1 and d2:
        return {"img1": d1, "img2": d2, "layout":"fallback-search"}

    raise RuntimeError(f"[{split}] Could not locate img1/img2 dirs anywhere under {root}.")

def norm_key(p: Path):
    s = p.stem.lower()
    s = re.sub(r"^((a|b|t1|t2|img1|img2|image1|image2)[-_]+)+","",s)
    s = re.sub(r"([-_](a|b|t1|t2|1|2|img1|img2|image1|image2|label|gt|change))+$","",s)
    return s

def prefer_png(paths):
    if not paths: return None
    png = [p for p in paths if p.suffix.lower()==".png"]
    return png[0] if png else paths[0]

def to_png(src: Path, dst: Path, is_label=False):
    if src.suffix.lower()==".png":
        shutil.copy2(src, dst)
    else:
        img = Image.open(src)
        img = img.convert("L") if is_label and img.mode!="L" else (img.convert("RGB") if not is_label else img)
        img.save(dst)

summary = {}

for sp in splits:
    base = SRC_ROOT/sp
    # labels are already here for train/val (as you showed)
    label_dir = base/"label"
    if sp != "test" and not label_dir.exists():
        raise RuntimeError(f"[{sp}] Expected {label_dir} to exist for labels.")

    # find img1/img2 for this split
    mods = find_modality_dir_for_split(SRC_ROOT, sp)
    d1, d2 = mods["img1"], mods["img2"]
    print(f"[{sp}] using {mods['layout']}  img1={d1}  img2={d2}")

    # index files by normalized key
    files1 = rcollect(d1); files2 = rcollect(d2)
    idx1, idx2 = {}, {}
    for p in files1: idx1.setdefault(norm_key(p), []).append(p)
    for p in files2: idx2.setdefault(norm_key(p), []).append(p)

    idxL = {}
    if sp != "test":
        filesL = rcollect(label_dir, exts={".png"})  # your labels are .png
        for p in filesL: idxL.setdefault(norm_key(p), []).append(p)

    if sp == "test":
        common = sorted(set(idx1) & set(idx2))
    else:
        common = sorted(set(idx1) & set(idx2) & set(idxL))

    if not common:
        raise RuntimeError(f"[{sp}] No matching basenames after normalization.")

    written = 0
    with open(DST_ROOT/"list"/f"{sp}.txt","w") as lf:
        for k in common:
            p1 = prefer_png(idx1[k]); p2 = prefer_png(idx2[k])
            outname = f"{k}.png"
            to_png(p1, (A/outname))
            to_png(p2, (B/outname))
            if sp != "test":
                pL = prefer_png(idxL[k])
                to_png(pL, (L/outname), is_label=True)
            lf.write(outname+"\n")
            written += 1

    summary[sp] = {"pairs": written, "candidates": len(common)}

print("\nDone. Target:", DST_ROOT)
print("A:", len(list((DST_ROOT/'A').glob('*.png'))),
      "B:", len(list((DST_ROOT/'B').glob('*.png'))),
      "L:", len(list((DST_ROOT/'label').glob('*.png'))))
for sp in splits:
    p = DST_ROOT/'list'/f"{sp}.txt"
    print(sp, "list lines:", (sum(1 for _ in open(p)) if p.exists() else 0))
print("Summary:", summary)


[train] using fallback-search  img1=/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking/train/Image1  img2=/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking/train/Image2
[val] using fallback-search  img1=/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking/val/Image1  img2=/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking/val/Image2
[test] using fallback-search  img1=/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking/test/Image1  img2=/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking/test/Image2

Done. Target: /content/drive/MyDrive/datasets/S2Looking_MobileCDNet
A: 5000 B: 5000 L: 4000
train list lines: 3500
val list lines: 500
test list lines: 1000
Summary: {'train': {'pairs': 3500, 'candidates': 3500}, 'val': {'pairs': 500, 'candidates': 500}, 'test': {'pairs': 1000, 'candidates': 1000}}


In [4]:
# (Optional) Clean + pin versions known to work with this repo
!pip uninstall -y opencv-contrib-python thinc || true
!pip install -q numpy==1.26.4 tqdm==4.67.1 albumentations==1.4.10 opencv-python==4.10.0.84

# The repo suggests torch 1.8.x; Colab ships 2.x which may also work,
# but if you hit a Torch/TV mismatch, install the pair below:
# !pip install -q torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html


Found existing installation: opencv-contrib-python 4.12.0.88
Uninstalling opencv-contrib-python-4.12.0.88:
  Successfully uninstalled opencv-contrib-python-4.12.0.88
Found existing installation: thinc 8.3.6
Uninstalling thinc-8.3.6:
  Successfully uninstalled thinc-8.3.6
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m115.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.9/161.9 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.0/50.0 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behavi

In [5]:
!git clone https://github.com/tawneydaylily/Mobile-CDNet.git
%cd Mobile-CDNet


Cloning into 'Mobile-CDNet'...
remote: Enumerating objects: 100, done.[K
remote: Counting objects: 100% (100/100), done.[K
remote: Compressing objects: 100% (97/97), done.[K
remote: Total 100 (delta 12), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (100/100), 10.51 MiB | 14.62 MiB/s, done.
Resolving deltas: 100% (12/12), done.
/content/Mobile-CDNet


In [6]:
# 1) Ensure we are at the repo root
%cd /content/Mobile-CDNet
!pwd
!ls -la


/content/Mobile-CDNet
/content/Mobile-CDNet
total 48
drwxr-xr-x 6 root root 4096 Oct  9 15:15 .
drwxr-xr-x 1 root root 4096 Oct  9 15:15 ..
drwxr-xr-x 2 root root 4096 Oct  9 15:15 chap4
-rw-r--r-- 1 root root 1598 Oct  9 15:15 dataset.py
drwxr-xr-x 8 root root 4096 Oct  9 15:15 .git
-rw-r--r-- 1 root root 3990 Oct  9 15:15 metric_tool.py
drwxr-xr-x 5 root root 4096 Oct  9 15:15 models
-rw-r--r-- 1 root root 2257 Oct  9 15:15 README.md
drwxr-xr-x 2 root root 4096 Oct  9 15:15 tools
-rw-r--r-- 1 root root 6705 Oct  9 15:15 Transforms.py
-rw-r--r-- 1 root root  431 Oct  9 15:15 utils.py


In [None]:
# 2) See if a "models" folder exists and whether it has __init__.py
!find . -maxdepth 2 -type d -iname "models" -print
!ls -la ./models || true


./models
./Mobile-CDNet/models
total 116
drwxr-xr-x 5 root root  4096 Oct  9 10:11 .
drwxr-xr-x 7 root root  4096 Oct  9 10:17 ..
drwxr-xr-x 3 root root  4096 Oct  9 10:11 backbones
-rw-r--r-- 1 root root  5073 Oct  9 10:11 bam.py
-rw-r--r-- 1 root root 12618 Oct  9 10:11 bit.py
-rw-r--r-- 1 root root  3235 Oct  9 10:11 _blocks.py
-rw-r--r-- 1 root root    21 Oct  9 10:11 __init__.py
-rw-r--r-- 1 root root  4188 Oct  9 10:11 MobileNetV2.py
-rw-r--r-- 1 root root 18256 Oct  9 10:11 model91.79.py
-rw-r--r-- 1 root root  3841 Oct  9 10:11 model.py
-rw-r--r-- 1 root root  5270 Oct  9 10:11 nlfpn.py
-rw-r--r-- 1 root root  6646 Oct  9 10:11 pam.py
drwxr-xr-x 2 root root  4096 Oct  9 10:11 __pycache__
-rw-r--r-- 1 root root  9441 Oct  9 10:11 resnet.py
drwxr-xr-x 3 root root  4096 Oct  9 10:11 sync_batchnorm
-rw-r--r-- 1 root root   512 Oct  9 10:11 _utils.py


In [None]:
# 3) Make sure Python treats folders as packages
# (many research repos forget these files)
!touch ./models/__init__.py
!touch ./tools/__init__.py


In [None]:
# 4) Double-check Python sees the repo root first on sys.path

import os, sys
print("CWD:", os.getcwd())
print("Has ./models?", os.path.isdir("models"))
print("models/__init__.py exists?", os.path.isfile("models/__init__.py"))
print("tools/__init__.py exists?", os.path.isfile("tools/__init__.py"))
print("First 3 sys.path entries:", sys.path[:3])



CWD: /content/Mobile-CDNet
Has ./models? True
models/__init__.py exists? True
tools/__init__.py exists? True
First 3 sys.path entries: ['/content', '/env/python', '/usr/lib/python312.zip']


In [None]:
# Check that the dataset is laid out correctly
!ls -la /content/drive/MyDrive/datasets/S2Looking_MobileCDNet
!ls -la /content/drive/MyDrive/datasets/S2Looking_MobileCDNet/list

# Peek at the first few lines in each split list
!head -n 5 /content/drive/MyDrive/datasets/S2Looking_MobileCDNet/list/train.txt
!head -n 5 /content/drive/MyDrive/datasets/S2Looking_MobileCDNet/list/val.txt
!head -n 5 /content/drive/MyDrive/datasets/S2Looking_MobileCDNet/list/test.txt


total 16
drwx------ 2 root root 4096 Oct  9 09:06 A
drwx------ 2 root root 4096 Oct  9 09:06 B
drwx------ 2 root root 4096 Oct  9 09:06 label
drwx------ 2 root root 4096 Oct  9 09:06 list
total 44
-rw------- 1 root root  8785 Oct  9 10:03 test.txt
-rw------- 1 root root 30704 Oct  9 09:52 train.txt
-rw------- 1 root root  4404 Oct  9 09:52 val.txt
1.png
10.png
100.png
1000.png
1002.png
1001.png
1016.png
1020.png
1045.png
1054.png
1008.png
1011.png
1021.png
1023.png
1029.png


In [None]:
%cd /content/Mobile-CDNet

# Make a backup
!cp tools/train.py tools/train.py.bak

# Patch: allow absolute/relative paths if they exist; keep legacy shortcuts too

from pathlib import Path
p = Path("tools/train.py")
src = p.read_text()

old = """if args.file_root == 'LEVIR':
        args.file_root = 'H:\\\\penghaifeng\\\\LEVIR-CD'
    elif args.file_root == 'BCDD':
        args.file_root = 'H:\\\\penghaifeng\\\\BCDD'
    elif args.file_root == 'SYSU':
        args.file_root = 'H:\\\\penghaifeng\\\\SYSU-CD'
    elif args.file_root == 'CDD':
        args.file_root = '/home/guan/Documents/Datasets/ChangeDetection/CDD'
    elif args.file_root == 'quick_start':
        args.file_root = './samples'
    else:
        raise TypeError('%s has not defined' % args.file_root)"""
new = """# Accept known aliases, otherwise accept any existing path
    if args.file_root == 'LEVIR':
        args.file_root = 'H:\\\\penghaifeng\\\\LEVIR-CD'
    elif args.file_root == 'BCDD':
        args.file_root = 'H:\\\\penghaifeng\\\\BCDD'
    elif args.file_root == 'SYSU':
        args.file_root = 'H:\\\\penghaifeng\\\\SYSU-CD'
    elif args.file_root == 'CDD':
        args.file_root = '/home/guan/Documents/Datasets/ChangeDetection/CDD'
    elif args.file_root == 'quick_start':
        args.file_root = './samples'
    else:
        # If it's an existing directory (e.g., your custom dataset), use it directly
        import os
        if not os.path.isdir(args.file_root):
            raise TypeError('%s has not defined' % args.file_root)"""

p.write_text(src.replace(old, new))
print("Patched tools/train.py")



/content/Mobile-CDNet
Patched tools/train.py


In [None]:
%%bash
cat > dataset.py << 'PY'
import os
import cv2
import numpy as np
import torch.utils.data as data

class Dataset(data.Dataset):
    """
    Mobile-CD style loader:
    file_root/
      A/      B/      label/
      list/train.txt  list/val.txt  list/test.txt
    Each list contains bare filenames like:  0001.png
    """

    def __init__(self, dataset, file_root='data/', transform=None):
        self.split = dataset  # "train" | "val" | "test"
        self.file_root = file_root
        self.transform = transform

        # Prefer file_root/list/<split>.txt; fallback to file_root/<split>.txt
        cand_lists = [
            os.path.join(file_root, "list", f"{dataset}.txt"),
            os.path.join(file_root, f"{dataset}.txt"),
            # last resort (old style the repo expected, but not your case)
            os.path.join(file_root, dataset, "list", f"{dataset}.txt"),
        ]
        list_path = next((p for p in cand_lists if os.path.isfile(p)), None)
        if list_path is None:
            raise FileNotFoundError(f"List file not found. Tried: {cand_lists}")

        with open(list_path, "r") as f:
            names = [ln.strip() for ln in f if ln.strip()]

        A_dir = os.path.join(file_root, "A")
        B_dir = os.path.join(file_root, "B")
        L_dir = os.path.join(file_root, "label")

        pre_images, post_images, gts = [], [], []
        for n in names:
            pa = os.path.join(A_dir, n)
            pb = os.path.join(B_dir, n)
            pl = os.path.join(L_dir, n)
            # Require GT for all splits (your set has labels for train/val; keep test too if present)
            if os.path.isfile(pa) and os.path.isfile(pb) and os.path.isfile(pl):
                pre_images.append(pa)
                post_images.append(pb)
                gts.append(pl)

        if len(pre_images) == 0:
            raise RuntimeError(f"No valid samples for split='{dataset}' under {file_root}")

        self.pre_images = pre_images
        self.post_images = post_images
        self.gts = gts

    def __len__(self):
        return len(self.pre_images)

    def __getitem__(self, idx):
        pre_path = self.pre_images[idx]
        post_path = self.post_images[idx]
        gt_path = self.gts[idx]

        pre = cv2.imread(pre_path, cv2.IMREAD_COLOR)   # BGR
        post = cv2.imread(post_path, cv2.IMREAD_COLOR)
        if pre is None or post is None:
            raise RuntimeError(f"Failed to read: {pre_path} or {post_path}")

        pre = cv2.cvtColor(pre, cv2.COLOR_BGR2RGB)
        post = cv2.cvtColor(post, cv2.COLOR_BGR2RGB)

        lab = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
        if lab is None:
            raise RuntimeError(f"Failed to read label: {gt_path}")
        # binarize: >0 → 1.0, keep shape (H, W, 1)
        lab = (lab > 0).astype(np.float32)[..., None]

        # concat 6 channels, scale to [0,1] for Normalize(mean/std in train.py)
        img6 = np.concatenate([pre, post], axis=2).astype(np.float32) / 255.0

        if self.transform is not None:
            img6, lab = self.transform(img6, lab)

        return img6, lab

    def get_img_info(self, idx):
        img = cv2.imread(self.pre_images[idx])
        return {"height": img.shape[0], "width": img.shape[1]}
PY


In [None]:
from pathlib import Path

# Step 1 — paths
repo = Path("/content/Mobile-CDNet")
train_py = repo / "tools/train.py"
backup = repo / "tools/train.py.bak"

# Step 2 — back up safely
backup.write_text(train_py.read_text())

# Step 3 — read and patch
text = train_py.read_text()

# a) Lower num_workers default (optional)
text = text.replace(
    "parser.add_argument('--num_workers', type=int, default=4",
    "parser.add_argument('--num_workers', type=int, default=2"
)

# b) Wrap test loader in try/except to skip missing test split
import re
pattern_testblock = re.compile(
    r'    test_data = myDataLoader\.Dataset\("test".*?pin_memory=False\)', re.S
)
replacement_block = """    # Optional test set (skip if unavailable)
    has_test = True
    try:
        test_data = myDataLoader.Dataset("test", file_root=args.file_root, transform=valDataset)
        testLoader = torch.utils.data.DataLoader(
            test_data, shuffle=False,
            batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=False)
    except Exception as e:
        print("[Info] Test split unavailable or unlabeled — skipping test phase.\\nReason:", e)
        has_test = False"""
text = pattern_testblock.sub(replacement_block, text)

# c) Guard the final test evaluation
pattern_final = re.compile(
    r'    loss_test, score_test = val\(args, testLoader, model, 0\).*?logger\.flush\(\)', re.S
)
replacement_final = """    if has_test:
        loss_test, score_test = val(args, testLoader, model, 0)
        print("\\nTest :\\t Kappa (te) = %.4f\\t IoU (te) = %.4f\\t F1 (te) = %.4f\\t R (te) = %.4f\\t P (te) = %.4f" %
              (score_test['Kappa'], score_test['IoU'], score_test['F1'], score_test['recall'], score_test['precision']))
        logger.write("\\n%s\\t\\t%.4f\\t\\t%.4f\\t\\t%.4f\\t\\t%.4f\\t\\t%.4f" % (
            'Test', score_test['Kappa'], score_test['IoU'], score_test['F1'],
            score_test['recall'], score_test['precision']))
        logger.flush()"""
text = pattern_final.sub(replacement_final, text)

# Step 4 — save
train_py.write_text(text)
print("✅ Patched tools/train.py successfully. Backup saved at:", backup)


✅ Patched tools/train.py successfully. Backup saved at: /content/Mobile-CDNet/tools/train.py.bak


In [None]:
from pathlib import Path
tf = Path("/content/Mobile-CDNet/Transforms.py")
src = tf.read_text()

# 1) Fix the exact offending line (and any similar uses)
src = src.replace("dtype=np.int)", "dtype=np.int64)")
src = src.replace("dtype = np.int)", "dtype=np.int64)")

# 2) (Optional, future-proof) fix other deprecated aliases if they exist
src = src.replace("np.float)", "np.float64)")
src = src.replace("np.bool)", "np.bool_)")  # or use .astype(bool) if it's a cast

tf.write_text(src)
print("Transforms.py patched.")


Transforms.py patched.


In [1]:
from pathlib import Path
import shutil, json

# --- paths ---
DST_ROOT = Path("/content/drive/MyDrive/datasets/S2Looking_MobileCDNet")  # has A, B, label, list
SRC_ROOT = Path("/content/drive/MyDrive/datasets/S2Looking_raw/S2Looking")# has train/val/test with img1,img2,label...

assert DST_ROOT.exists(), "DST_ROOT not found"
assert SRC_ROOT.exists(), "SRC_ROOT not found"

# --- gather existing files ---
A = {p.name for p in (DST_ROOT/"A").glob("*.png")}
B = {p.name for p in (DST_ROOT/"B").glob("*.png")}
L = {p.name for p in (DST_ROOT/"label").glob("*.png")}

# load current split lists (if present)
split_files = {}
split_names = {}
for sp in ["train","val","test"]:
    f = DST_ROOT/"list"/f"{sp}.txt"
    if f.exists():
        split_files[sp] = f
        split_names[sp] = [ln.strip() for ln in f.read_text().splitlines() if ln.strip()]
    else:
        split_files[sp] = f
        split_names[sp] = []

# --- find orphans (in A & B but missing in label) ---
orphans_ab = sorted((A & B) - L)

# helper: try to find a label in raw for a given filename and (optionally) split
def find_label_in_raw(name, prefer_split=None):
    # 1) try preferred split first
    if prefer_split:
        cand = SRC_ROOT/prefer_split/"label"/name
        if cand.exists():
            return cand
    # 2) try any split
    for sp in ["train","val","test"]:
        cand = SRC_ROOT/sp/"label"/name
        if cand.exists():
            return cand
    return None

# --- try to recover missing labels from raw ---
recovered = []
not_found = []
for n in orphans_ab:
    # infer preferred split from existing lists
    prefer = None
    for sp in ["train","val","test"]:
        if n in split_names[sp]:
            prefer = sp
            break
    src = find_label_in_raw(n, prefer_split=prefer)
    if src:
        shutil.copy2(src, DST_ROOT/"label"/n)
        recovered.append({"name": n, "from": str(src)})
    else:
        not_found.append(n)

# recompute label set after recovery
L2 = {p.name for p in (DST_ROOT/"label").glob("*.png")}
triplets = A & B & L2

# --- rewrite split lists to keep only valid triplets ---
rewritten_counts = {}
for sp in ["train","val","test"]:
    names = split_names[sp]
    if not names:
        # if no existing list, just leave it empty (we won't guess the split)
        rewritten = []
    else:
        rewritten = [n for n in names if n in triplets]
    split_files[sp].parent.mkdir(parents=True, exist_ok=True)
    split_files[sp].write_text("\n".join(rewritten))
    rewritten_counts[sp] = len(rewritten)

# --- save a small report ---
report = {
    "A": len(A), "B": len(B), "label_before": len(L), "label_after": len(L2),
    "orphans_checked": len(orphans_ab),
    "recovered": len(recovered),
    "not_found": len(not_found),
    "split_counts": rewritten_counts,
}
(DST_ROOT/"mismatch_report.json").write_text(json.dumps(
    {"report": report, "recovered": recovered, "not_found": not_found}, indent=2))

print("=== Sync summary ===")
print(report)
print("Report saved to:", DST_ROOT/"mismatch_report.json")


=== Sync summary ===
{'A': 5000, 'B': 5000, 'label_before': 4000, 'label_after': 5000, 'orphans_checked': 1000, 'recovered': 1000, 'not_found': 0, 'split_counts': {'train': 3500, 'val': 500, 'test': 1000}}
Report saved to: /content/drive/MyDrive/datasets/S2Looking_MobileCDNet/mismatch_report.json


In [10]:
from pathlib import Path, re
tf = Path("/content/Mobile-CDNet/Transforms.py")
src = tf.read_text()

# Fix deprecated NumPy aliases everywhere
src = src.replace("np.int)",   "np.int64)")
src = src.replace("np.int)",   "np.int64)")
src = src.replace("np.float)", "np.float32)")
src = src.replace("np.bool)",  "np.bool_)")

# Also handle cases with spaces like 'dtype = np.int' if present
src = re.sub(r"dtype\s*=\s*np\.int\b",   "dtype=np.int64", src)
src = re.sub(r"dtype\s*=\s*np\.float\b", "dtype=np.float32", src)
src = re.sub(r"dtype\s*=\s*np\.bool\b",  "dtype=np.bool_", src)

tf.write_text(src)
print("✅ Patched deprecated NumPy dtypes in Transforms.py")


✅ Patched deprecated NumPy dtypes in Transforms.py


In [13]:
%cd /content/Mobile-CDNet
!PYTHONPATH=/content/Mobile-CDNet python -m tools.train \
  --file_root "/content/drive/MyDrive/datasets/S2Looking_MobileCDNet" \
  --inWidth 256 --inHeight 256 \
  --batch_size 8 --num_workers 14 \
  --lr 0.001 --max_steps 5000 --step_loss 100 \
  --savedir "/content/drive/MyDrive/experiments/mobilecdnet_s2looking" \
  --resume False --onGPU True


/content/Mobile-CDNet
Called with args:
Namespace(file_root='/content/drive/MyDrive/datasets/S2Looking_MobileCDNet', inWidth=256, inHeight=256, max_steps=5000, num_workers=14, batch_size=8, step_loss=100, lr=0.001, lr_mode='poly', savedir='/content/drive/MyDrive/experiments/mobilecdnet_s2looking', resume='False', logFile='trainValLog.txt', onGPU=True, weight='', ms=0)
loading imagenet pretrained mobilenetv2
loaded imagenet pretrained mobilenetv2
Total network parameters (excluding idr): 2946089
For each epoch, we have 437 batches
=> no checkpoint found at '/content/drive/MyDrive/experiments/mobilecdnet_s2looking/S2Looking_MobileCDNet_iter_5000_lr_0.001/checkpoint.pth.tar'
iteration: [872/5244] f1: 0.428 lr: 0.0008490 loss: 0.680 time:0.102 h63
[60/63] F1: 0.189775 loss: 0.956 time: 0.023Epoch 1: Details

Epoch No. 1:	Train Loss = 0.7248	Val Loss = 0.9510	 F1(tr) = 0.3766	 F1(val) = 0.1538
iteration: [1309/5244] f1: 0.520 lr: 0.0007722 loss: 0.551 time:0.089 h63
[60/63] F1: 0.129070 los

In [None]:
import os, cv2, torch, numpy as np
from pathlib import Path

# ===== Your exact paths (no slash) =====
BEST = "/content/drive/MyDrive/experiments/mobilecdnet_s2looking/S2Looking_MobileCDNet_iter_5000_lr_0.001best_model.pth"
CKPT = "/content/drive/MyDrive/experiments/mobilecdnet_s2looking/S2Looking_MobileCDNet_iter_5000_lr_0.001checkpoint.pth.tar"

DATA_ROOT = "/content/drive/MyDrive/datasets/S2Looking_MobileCDNet"
INW, INH = 256, 256
N_SAMPLES = 10

# Repo imports
import sys
sys.path.insert(0, "/content/Mobile-CDNet")
from models.model import BaseNet
import dataset as myDataLoader
import Transforms as myTransforms

# Pick which file exists
ckpt_path = None
ckpt_type = None
if Path(BEST).exists():
    ckpt_path, ckpt_type = BEST, "state_dict"
elif Path(CKPT).exists():
    ckpt_path, ckpt_type = CKPT, "checkpoint"
else:
    raise FileNotFoundError("Neither best_model nor checkpoint found at the given paths.")

print("Loading:", ckpt_path, f"({ckpt_type})")

# Dataset/Transforms
mean = [0.406, 0.456, 0.485, 0.406, 0.456, 0.485]
std  = [0.225, 0.224, 0.229, 0.225, 0.224, 0.229]
valT = myTransforms.Compose([
    myTransforms.Normalize(mean=mean, std=std),
    myTransforms.Scale(INW, INH),
    myTransforms.ToTensor()
])

val_ds = myDataLoader.Dataset("val", file_root=DATA_ROOT, transform=valT)
print("Val size:", len(val_ds))

# Model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = BaseNet(3,1)
state = torch.load(ckpt_path, map_location="cpu")
if ckpt_type == "checkpoint":
    state = state["state_dict"]
model.load_state_dict(state)
model.eval().to(device)

# Output folder (next to your checkpoint file)
out_dir = Path(ckpt_path).parent / (Path(ckpt_path).name + "_val_preds")
out_dir.mkdir(parents=True, exist_ok=True)

# Save a handful of predictions
step = max(1, len(val_ds)//N_SAMPLES) if len(val_ds)>N_SAMPLES else 1
indices = list(range(0, min(len(val_ds), N_SAMPLES*step), step))

for i in indices:
    img, gt = val_ds[i]          # img: 6xHxW, gt: 1xHxW
    pre  = img[:3].unsqueeze(0).to(device).float()
    post = img[3:6].unsqueeze(0).to(device).float()
    with torch.no_grad():
        prob = model(pre, post)           # Bx1xHxW
        pred = (prob > 0.5).float()
    m = (pred[0,0].cpu().numpy()*255).astype(np.uint8)
    cv2.imwrite(str(out_dir / f"val_{i:05d}.png"), m)

print("Saved predictions to:", out_dir)


In [None]:
%cd /content/Mobile-CDNet
!PYTHONPATH=/content/Mobile-CDNet python -m tools.train \
  --file_root "/content/drive/MyDrive/datasets/S2Looking_MobileCDNet" \
  --inWidth 256 --inHeight 256 \
  --batch_size 8 --num_workers 14 \
  --lr 0.001 --max_steps 10000 --step_loss 100 \
  --savedir "/content/drive/MyDrive/experiments/mobilecdnet_s2looking" \
  --resume True --onGPU True


In [16]:
# make a persistent folder inside your Google Drive
!mkdir -p "/content/drive/MyDrive/dev"

# copy your working repo from Colab's temp storage to Drive
!rsync -a --delete "/content/Mobile-CDNet/" "/content/drive/MyDrive/dev/Mobile-CDNet/"



In [32]:
!mkdir -p "/content/drive/MyDrive/dev"
!rsync -a --delete "/content/Mobile-CDNet/" "/content/drive/MyDrive/dev/Mobile-CDNet/"
%cd /content/drive/MyDrive/dev/Mobile-CDNet


/content/drive/MyDrive/dev/Mobile-CDNet


In [33]:
!mkdir -p "/content/drive/MyDrive/dev"
!rsync -a --delete "/content/Mobile-CDNet/" "/content/drive/MyDrive/dev/Mobile-CDNet/"
%cd /content/drive/MyDrive/dev/Mobile-CDNet


/content/drive/MyDrive/dev/Mobile-CDNet


In [34]:
!mkdir -p "/content/drive/MyDrive/experiments/mobilecdnet_s2looking"
# If you already have a best model path, also save a 'last.pt' alias:
!cp -n "/content/drive/MyDrive/experiments/mobilecdnet_s2looking/S2Looking_MobileCDNet_iter_5000_lr_0.001best_model.pth" \
       "/content/drive/MyDrive/experiments/mobilecdnet_s2looking/last.pt"
!ls -lh "/content/drive/MyDrive/experiments/mobilecdnet_s2looking"


total 55M
-rw------- 1 root root  12M Oct  9 18:17 last.pt
drwx------ 4 root root 4.0K Oct  9 18:16 S2Looking_MobileCDNet_iter_5000_lr_0.001
-rw------- 1 root root  12M Oct  9 17:54 S2Looking_MobileCDNet_iter_5000_lr_0.001best_model.pth
-rw------- 1 root root  32M Oct  9 17:54 S2Looking_MobileCDNet_iter_5000_lr_0.001checkpoint.pth.tar
-rw------- 1 root root  581 Oct  9 17:58 S2Looking_MobileCDNet_iter_5000_lr_0.001trainValLog.txt


In [35]:
%cd /content/drive/MyDrive/dev/Mobile-CDNet
!pip freeze > requirements.txt


/content/drive/MyDrive/dev/Mobile-CDNet


In [36]:
%cd /content/drive/MyDrive/dev/Mobile-CDNet
!git init
!git config user.name "karaxstone7"
!git config user.email "karaxstone7@gmail.com"
!git add -A
!git commit -m "Colab snapshot: Mobile-CDNet + S2Looking pretraining"
!git branch -M main


/content/drive/MyDrive/dev/Mobile-CDNet
Reinitialized existing Git repository in /content/drive/MyDrive/dev/Mobile-CDNet/.git/
[main b879e27] Colab snapshot: Mobile-CDNet + S2Looking pretraining
 12 files changed, 839 insertions(+), 135 deletions(-)
 create mode 100644 __pycache__/Transforms.cpython-312.pyc
 create mode 100644 __pycache__/dataset.cpython-312.pyc
 create mode 100644 __pycache__/metric_tool.cpython-312.pyc
 create mode 100644 __pycache__/utils.cpython-312.pyc
 rewrite dataset.py (77%)
 create mode 100644 models/__pycache__/MobileNetV2.cpython-312.pyc
 create mode 100644 models/__pycache__/__init__.cpython-312.pyc
 create mode 100644 models/__pycache__/model.cpython-312.pyc
 create mode 100644 requirements.txt
 create mode 100644 tools/__pycache__/train.cpython-312.pyc


In [37]:
!git remote remove origin >/dev/null 2>&1 || true
!git remote add origin https://github.com/karaxstone7/mobilecdnet-ps10.git


In [None]:
!git config credential.helper store
