# Train Mask R-CNN on DS2 Dense TMN (Google Colab)
This notebook sets up a Colab GPU runtime, installs dependencies, mounts Google Drive, and provides scaffolding for training Mask R-CNN on the DS2 Dense TMN dataset. Adjust paths to match your Drive and dataset locations.
# Colab Runtime and GPU Check
Detects Colab and verifies GPU availability.
# Install Dependencies
Installs torch/torchvision compatible with Colab CUDA, plus utilities.
# Connect to Google Drive
Mounts Drive and sets base paths.
# Environment Variables and Paths
Configures directories and ensures they exist.
# Download or Upload Data
Options to fetch or upload dataset zips and verify integrity.
# Quick Benchmark: CPU vs GPU
Simple timing to confirm GPU acceleration.
# Save Outputs to Drive
Example of writing artifacts to Drive.

In [None]:
# Colab Runtime and GPU Check
import os, sys, platform, time, json
IN_COLAB = 'google.colab' in sys.modules
print(f"In Colab: {IN_COLAB}")
try:
    import torch
    print(f"Torch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print(f"CUDA device count: {torch.cuda.device_count()}")
        print(f"Current device: {torch.cuda.current_device()}")
        print(f"Device name: {torch.cuda.get_device_name(0)}")
    else:
        device = torch.device('cpu')
except Exception as e:
    print('Torch import failed, will install below.', e)

In [None]:
# Install Dependencies
import sys
def pip_install(pkgs):
    import subprocess
    cmd = [sys.executable, '-m', 'pip', 'install', '-U'] + pkgs
    print('Running:', ' '.join(cmd))
    subprocess.check_call(cmd)
try:
    import torch, torchvision
    import numpy as np
    import pandas as pd
    import matplotlib
    import matplotlib.pyplot as plt
    import PIL
    import pycocotools
    print('Dependencies already installed.')
except Exception:
    pkgs = [
        'torch', 'torchvision', 'torchaudio',
        'numpy', 'pandas', 'matplotlib', 'pillow',
        'pycocotools',
        'tqdm',
    ]
    pip_install(pkgs)
    import torch, torchvision
    import numpy as np
    import pandas as pd
    import matplotlib
    import matplotlib.pyplot as plt
    import PIL
    import pycocotools
print('torch:', torch.__version__, 'torchvision:', torchvision.__version__)

In [None]:
# Connect to Google Drive
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    BASE_DIR = '/content/drive/MyDrive/omr_copilot'
else:
    import os
    BASE_DIR = os.path.expanduser('~/omr_copilot_colab')
print('BASE_DIR:', BASE_DIR)

In [None]:
# Environment Variables and Paths
from pathlib import Path
import os
DATA_DIR = Path(BASE_DIR) / 'datasets' / 'ds2_dense_tmn'
OUTPUT_DIR = Path(BASE_DIR) / 'outputs' / 'mask_rcnn'
for p in [DATA_DIR, OUTPUT_DIR]:
    p.mkdir(parents=True, exist_ok=True)
os.environ['OMR_BASE_DIR'] = str(BASE_DIR)
os.environ['OMR_DATA_DIR'] = str(DATA_DIR)
os.environ['OMR_OUTPUT_DIR'] = str(OUTPUT_DIR)
print('DATA_DIR:', DATA_DIR)
print('OUTPUT_DIR:', OUTPUT_DIR)

In [None]:
# Download or Upload Data
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    print('Option A: Upload via files.upload()')
    from google.colab import files
    # Uncomment to use manual upload
    # uploaded = files.upload()
    print('Option B: Use Drive: Place zip at', DATA_DIR)
else:
    print('Running outside Colab; ensure dataset exists at', DATA_DIR)
from pathlib import Path
ZIP_PATH = DATA_DIR / 'ds2_dense_tmn.zip'
EXTRACT_DIR = DATA_DIR
if ZIP_PATH.exists():
    import zipfile
    with zipfile.ZipFile(ZIP_PATH, 'r') as zf:
        zf.extractall(EXTRACT_DIR)
        print('Extracted to', EXTRACT_DIR)
# Verify structure
expected_dirs = ['images', 'segmentation', 'instance', 'jsonlar']
for d in expected_dirs:
    p = EXTRACT_DIR / d
    print(d, 'exists:', p.exists(), 'path:', p)

In [None]:
# Quick Benchmark: CPU vs GPU
import torch, time
x = torch.randn((2048, 2048))
def bench(device):
    t0 = time.time()
    y = x.to(device) * x.to(device)
    torch.cuda.synchronize() if device.type == 'cuda' else None
    return time.time()-t0
cpu_t = bench(torch.device('cpu'))
gpu_t = None
if torch.cuda.is_available():
    gpu_t = bench(torch.device('cuda'))
print({'cpu_sec': cpu_t, 'gpu_sec': gpu_t})
if torch.cuda.is_available():
    print('GPU mem (allocated):', torch.cuda.memory_allocated())

In [None]:
# Save Outputs to Drive
import pandas as pd
from pathlib import Path
OUT = Path(OUTPUT_DIR)
OUT.mkdir(parents=True, exist_ok=True)
df = pd.DataFrame({'metric': ['cpu_sec','gpu_sec'], 'value': [0.0, 0.0]})
csv_path = OUT / 'benchmark.csv'
df.to_csv(csv_path, index=False)
print('Wrote', csv_path, 'exists:', csv_path.exists())

In [None]:
# Clone repo from GitHub (optional if already in Drive)
import os, sys, subprocess, pathlib
REPO_URL = 'https://github.com/mtalhabalci/omr_tmn.git'
WORKDIR = '/content/omr_tmn'
if 'google.colab' in sys.modules:
    if not os.path.isdir(WORKDIR):
        print('Cloning repo...')
        subprocess.check_call(['git','clone',REPO_URL, WORKDIR])
    else:
        print('Repo exists; pulling latest...')
        subprocess.call(['bash','-lc', f'cd {WORKDIR} && git pull --rebase'])
    %cd {WORKDIR}
else:
    print('Not in Colab; skipping clone and cd.')

In [None]:
# Configure Drive paths for DS2 Dense (source) and DS2 Dense TMN (output)
SRC_ROOT = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_dense'
OUT_ROOT = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_dense_tmn'
JSON_GLOB = f"{SRC_ROOT}/deepscores_*.json"  # matches train+test
SRC_IMAGES = f"{SRC_ROOT}/images"
print({'SRC_ROOT': SRC_ROOT, 'OUT_ROOT': OUT_ROOT, 'JSON_GLOB': JSON_GLOB})

In [None]:
# Run TMN placement: from-fs-missing, per-shard JSON, checkpoint flush
import sys, subprocess, os
cmd = [
    sys.executable, 'src/place_tmn_batch.py',
    '--images-dir', SRC_IMAGES,
    '--out-root', OUT_ROOT,
    '--json-glob', JSON_GLOB,
    '--from-fs-missing',
    '--checkpoint', '100',
    '--json-out-mode', 'per-shard',
    '--limit', '0'
]
print('Running:', ' '.join(cmd))
ret = subprocess.call(cmd)
print('Exit code:', ret)

In [None]:
# Verify outputs
import os, glob, json
img_count = len(glob.glob(f"{OUT_ROOT}/images/*.png"))
seg_count = len(glob.glob(f"{OUT_ROOT}/segmentation/*_seg.png"))
inst_count = len(glob.glob(f"{OUT_ROOT}/instance/*_inst.png"))
json_files = sorted(glob.glob(f"{OUT_ROOT}/jsonlar/*.json"))
print({'images': img_count, 'segmentation': seg_count, 'instance': inst_count, 'jsons': json_files[:5]})