## Hücre 1

# Generate DS2 Complete TMN (Google Colab)

This notebook only generates the DS2 Complete TMN outputs directly on Google Drive. Code is cloned from GitHub into /content/omr_tmn, dataset lives on Google Drive at SRC_ROOT. We run shard-wise TMN placement and verify counts. There is no training, no CUDA usage, and no Mask R-CNN code.


In [None]:
# Hücre 2

# Connect to Google Drive
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    print('Drive mounted at /content/drive')
else:
    print('Not in Colab; skipping Drive mount.')

In [None]:
# Hücre 3

# Clone repo from GitHub (optional if already in Drive)
import os, sys, subprocess, pathlib
REPO_URL = 'https://github.com/mtalhabalci/omr_tmn.git'
WORKDIR = '/content/omr_tmn'
if 'google.colab' in sys.modules:
    if not os.path.isdir(WORKDIR):
        print('Cloning repo...')
        subprocess.check_call(['git','clone',REPO_URL, WORKDIR])
    else:
        print('Repo exists; pulling latest...')
        subprocess.call(['bash','-lc', f'cd {WORKDIR} && git pull --rebase'])
    %cd {WORKDIR}
else:
    print('Not in Colab; skipping clone and cd.')

In [None]:
# Hücre 4

# Configure Drive paths for DS2 Complete (source) and DS2 Complete TMN (output)
SRC_ROOT = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_complete'
OUT_ROOT = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_complete_tmn'
SRC_IMAGES = f"{SRC_ROOT}/images"
print({'SRC_ROOT': SRC_ROOT, 'OUT_ROOT': OUT_ROOT, 'SRC_IMAGES': SRC_IMAGES})

In [None]:
# Hücre 5

# Run TMN placement on DS2 Complete: shard-wise, resume-safe, JSONs at OUT_ROOT root
import sys, subprocess, os, glob, time

# Ensure output structure exists (mirror source)
os.makedirs(os.path.join(OUT_ROOT, 'images'), exist_ok=True)
os.makedirs(os.path.join(OUT_ROOT, 'segmentation'), exist_ok=True)
os.makedirs(os.path.join(OUT_ROOT, 'instance'), exist_ok=True)
LOG_DIR = os.path.join(OUT_ROOT, 'logs')
os.makedirs(LOG_DIR, exist_ok=True)

# Collect JSON shards at SRC_ROOT root (not under jsonlar)
train_shards = sorted(glob.glob(os.path.join(SRC_ROOT, 'deepscores-complete-*_train.json')))
test_shards = sorted(glob.glob(os.path.join(SRC_ROOT, 'deepscores-complete-*_test.json')))
shards = train_shards + test_shards
print({'shard_count': len(shards), 'sample_shards': [os.path.basename(x) for x in shards[:5]]})

# Process each shard; skip if .done exists; within shard, skip existing files via --from-fs-missing
for jp in shards:
    base = os.path.basename(jp)
    done_marker = os.path.join(LOG_DIR, base + '.done')
    if os.path.exists(done_marker):
        print('Skipping shard, already done:', base)
        continue
    t0 = time.time()
    cmd = [
        sys.executable, 'src/place_tmn_batch.py',
        '--images-dir', SRC_IMAGES,
        '--out-root', OUT_ROOT,
        '--json-file', jp,
        '--from-fs-missing',
        '--checkpoint', '200',
        '--json-out-mode', 'per-shard'
    ]
    print('Running shard:', base)
    print('Command:', ' '.join(cmd))
    ret = subprocess.call(cmd)

    # If the placer wrote JSONs under OUT_ROOT/jsonlar, move them to OUT_ROOT root
    moved = []
    jsonlar_dir = os.path.join(OUT_ROOT, 'jsonlar')
    if os.path.isdir(jsonlar_dir):
        for f in glob.glob(os.path.join(jsonlar_dir, '*.json')):
            dest = os.path.join(OUT_ROOT, os.path.basename(f))
            try:
                os.replace(f, dest)
                moved.append(os.path.basename(f))
            except Exception as e:
                print('Move failed', f, '->', dest, e)
        # Remove empty jsonlar folder if now empty
        try:
            if not os.listdir(jsonlar_dir):
                os.rmdir(jsonlar_dir)
        except Exception:
            pass

    dur = round(time.time() - t0, 1)
    # Append per-shard log line
    try:
        with open(os.path.join(LOG_DIR, 'run.csv'), 'a', encoding='utf-8') as lf:
            lf.write(f"{base},{ret},{dur},{len(moved)}\n")
    except Exception as e:
        print('Log write failed:', e)

    if ret == 0:
        try:
            with open(done_marker, 'w', encoding='utf-8') as dm:
                dm.write('ok')
        except Exception as e:
            print('Done marker write failed:', e)

    print({'shard': base, 'exit': ret, 'sec': dur, 'moved_jsons': moved[:3]})

In [None]:
# Hücre 6

# Verify outputs for DS2 Complete TMN
import os, glob, json

img_count = len(glob.glob(f"{OUT_ROOT}/images/*.png"))
seg_count = len(glob.glob(f"{OUT_ROOT}/segmentation/*_seg.png"))
inst_count = len(glob.glob(f"{OUT_ROOT}/instance/*_inst.png"))
# JSONs are expected at OUT_ROOT root (no jsonlar folder)
json_files = sorted(glob.glob(f"{OUT_ROOT}/*_train.json") + glob.glob(f"{OUT_ROOT}/*_test.json"))
print({'images': img_count, 'segmentation': seg_count, 'instance': inst_count, 'jsons_sample': [os.path.basename(x) for x in json_files[:5]], 'json_total': len(json_files)})

# Optional: consistency by basename
basename_noext = lambda p: os.path.splitext(os.path.basename(p))[0]
base_images = {basename_noext(p) for p in glob.glob(f"{OUT_ROOT}/images/*.png")}
base_segs = {os.path.basename(p).replace('_seg.png','') for p in glob.glob(f"{OUT_ROOT}/segmentation/*_seg.png")}
base_insts = {os.path.basename(p).replace('_inst.png','') for p in glob.glob(f"{OUT_ROOT}/instance/*_inst.png")}

if base_images:
    missing_seg = sorted(list(base_images - base_segs))
    missing_inst = sorted(list(base_images - base_insts))
    print({'images_vs_seg_equal': len(missing_seg) == 0, 'images_vs_inst_equal': len(missing_inst) == 0})
    if missing_seg[:5]: print('Missing segmentation (first 5):', missing_seg[:5])
    if missing_inst[:5]: print('Missing instance (first 5):', missing_inst[:5])