In [None]:
# Hücre 2

# Colab ve Drive mount
import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    print('Drive mounted at /content/drive')
else:
    print('Not in Colab; skipping Drive mount.')

In [None]:
# Hücre 3

# Repo klonla ve çalışma dizinini ayarla
import os, sys, subprocess
REPO_URL = 'https://github.com/mtalhabalci/omr_tmn.git'
WORKDIR = '/content/omr_tmn'
SCRIPT_PATH = None

if 'google.colab' in sys.modules:
    if not os.path.isdir(WORKDIR):
        print('Cloning repo...')
        subprocess.check_call(['git','clone',REPO_URL, WORKDIR])
    else:
        print('Repo exists; pulling latest...')
        subprocess.call(['bash','-lc', f'cd {WORKDIR} && git pull --rebase'])
    os.chdir(WORKDIR)
    print('Changed dir to', os.getcwd())
    candidate = os.path.join(WORKDIR, 'src', 'place_tmn_batch.py')
    if os.path.isfile(candidate):
        SCRIPT_PATH = candidate
    else:
        alt = os.path.join(WORKDIR, 'place_tmn_batch.py')
        SCRIPT_PATH = alt if os.path.isfile(alt) else candidate
    print({'SCRIPT_PATH': SCRIPT_PATH, 'exists': os.path.isfile(SCRIPT_PATH)})
else:
    print('Not in Colab; skipping clone and cd.')

In [None]:
# Hücre 4

# DS2 Complete kaynak ve TMN çıktı yollarını ayarla
import os
SRC_ROOT = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_complete'
OUT_ROOT = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_complete_tmn'
SRC_IMAGES = f"{SRC_ROOT}/images"
print({'SRC_ROOT': SRC_ROOT, 'OUT_ROOT': OUT_ROOT, 'SRC_IMAGES': SRC_IMAGES})

# Çıktı klasörlerini hazırla
os.makedirs(os.path.join(OUT_ROOT, 'images'), exist_ok=True)
os.makedirs(os.path.join(OUT_ROOT, 'segmentation'), exist_ok=True)
os.makedirs(os.path.join(OUT_ROOT, 'instance'), exist_ok=True)
os.makedirs(os.path.join(OUT_ROOT, 'logs'), exist_ok=True)
INDEX_DIR = os.path.join(OUT_ROOT, 'index')
os.makedirs(INDEX_DIR, exist_ok=True)
print({'INDEX_DIR': INDEX_DIR})

In [None]:
# Hücre 5

# Image→JSON mapping index üret (CSV)
import os, glob, json
from collections import OrderedDict

train_shards = sorted(glob.glob(os.path.join(SRC_ROOT, 'deepscores-complete-*_train.json')))
test_shards = sorted(glob.glob(os.path.join(SRC_ROOT, 'deepscores-complete-*_test.json')))
if not train_shards and not test_shards:
    train_shards = sorted(glob.glob(os.path.join(SRC_ROOT, '*train*.json')))
    test_shards = sorted(glob.glob(os.path.join(SRC_ROOT, '*test*.json')))
shards = train_shards + test_shards
print({'shard_count': len(shards)})

# Öncelik: train sonra test
ordered_shards = train_shards + test_shards
image_to_json = OrderedDict()
duplicates = []
missing = []

def image_name_from_entry(im):
    if isinstance(im, dict):
        return im.get('filename') or im.get('file_name')
    return None

for jp in ordered_shards:
    try:
        with open(jp, 'r', encoding='utf-8') as f:
            data = json.load(f)
        imgs = data.get('images') or []
        if isinstance(imgs, dict):
            imgs = list(imgs.values())
        for im in imgs:
            nm = image_name_from_entry(im)
            if not nm:
                continue
            if nm in image_to_json:
                duplicates.append((nm, os.path.basename(jp)))
                continue
            p = os.path.join(SRC_IMAGES, nm)
            if not os.path.isfile(p):
                missing.append(nm)
                # yine de index'e ekleyebiliriz; ama raporlayalım
            image_to_json[nm] = os.path.basename(jp)
    except Exception as e:
        print('Read failed for', jp, e)

# CSV yaz
CSV_PATH = os.path.join(INDEX_DIR, 'image_to_json.csv')
with open(CSV_PATH, 'w', encoding='utf-8') as f:
    f.write('image_name,json_shard\n')
    for nm, shard in image_to_json.items():
        f.write(f"{nm},{shard}\n")
print({'index_csv': CSV_PATH, 'mapped_images': len(image_to_json)})

# Duplicates ve missing raporları
DUP_PATH = os.path.join(INDEX_DIR, 'duplicates.csv')
with open(DUP_PATH, 'w', encoding='utf-8') as f:
    f.write('image_name,also_in_shard\n')
    for nm, sh in duplicates[:1000]:
        f.write(f"{nm},{sh}\n")
MIS_PATH = os.path.join(INDEX_DIR, 'missing_images.csv')
with open(MIS_PATH, 'w', encoding='utf-8') as f:
    f.write('image_name\n')
    for nm in sorted(set(missing))[:5000]:
        f.write(f"{nm}\n")
print({'duplicates_written': len(duplicates), 'missing_written': len(set(missing))})

In [None]:
# Hücre 6

# Shard bazlı küçük batch koşusu (test)
import sys, subprocess, os, glob, time

# Script konumunu doğrula
sp_candidates = []
try:
    if 'SCRIPT_PATH' in globals() and isinstance(SCRIPT_PATH, str):
        sp_candidates.append(SCRIPT_PATH)
except Exception:
    pass
sp_candidates += [
    os.path.join('/content/omr_tmn', 'src', 'place_tmn_batch.py'),
    os.path.join('/content/omr_tmn', 'place_tmn_batch.py'),
    'src/place_tmn_batch.py'
]
SCRIPT_TO_RUN = next((p for p in sp_candidates if os.path.isfile(p)), None)
print({'SCRIPT_TO_RUN': SCRIPT_TO_RUN, 'exists': bool(SCRIPT_TO_RUN and os.path.isfile(SCRIPT_TO_RUN))})
if not SCRIPT_TO_RUN:
    print('ERROR: place_tmn_batch.py not found.')

# Shardları topla
train_shards = sorted(glob.glob(os.path.join(SRC_ROOT, 'deepscores-complete-*_train.json')))
test_shards = sorted(glob.glob(os.path.join(SRC_ROOT, 'deepscores-complete-*_test.json')))
if not train_shards and not test_shards:
    train_shards = sorted(glob.glob(os.path.join(SRC_ROOT, '*train*.json')))
    test_shards = sorted(glob.glob(os.path.join(SRC_ROOT, '*test*.json')))
shards = train_shards + test_shards
print({'shard_count': len(shards), 'sample_shards': [os.path.basename(x) for x in shards[:5]]})

LOG_DIR = os.path.join(OUT_ROOT, 'logs')
os.makedirs(LOG_DIR, exist_ok=True)

# Sadece ilk shard ile küçük limit (50) test
shards_to_run = shards[:1]
for jp in shards_to_run:
    base = os.path.basename(jp)
    done_marker = os.path.join(LOG_DIR, base + '.done')
    t0 = time.time()
    if SCRIPT_TO_RUN:
        cmd = [
            sys.executable, '-u', SCRIPT_TO_RUN,
            '--images-dir', SRC_IMAGES,
            '--out-root', OUT_ROOT,
            '--json-path', jp,
            '--checkpoint', '200',
            '--json-out-mode', 'per-shard',
            '--symbols-dir', '/content/omr_tmn/tmn_symbols_png',
            '--force',
            '--slot-w', '12',
            '--slot-h', '36',
            '--limit', '50'
        ]
    else:
        cmd = [sys.executable, '-u', '-c', "import sys; print('script missing'); sys.exit(2)"]

    print('Running shard:', base)
    print('Command:', ' '.join(cmd))

    TIMEOUT_PER_SHARD = int(os.environ.get('TMN_TIMEOUT_SEC', '3600'))
    HEARTBEAT_SEC = int(os.environ.get('TMN_HEARTBEAT_SEC', '30'))
    last_beat = time.time()
    combined_tail = []

    ret = None
    try:
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
        while True:
            line = p.stdout.readline()
            if not line:
                if p.poll() is not None:
                    break
                time.sleep(0.2)
                continue
            line = line.rstrip('
')
            if line:
                print(line)
                combined_tail.append(line)
                if len(combined_tail) > 200:
                    combined_tail = combined_tail[-200:]
            now = time.time()
            if now - last_beat >= HEARTBEAT_SEC:
                elapsed = int(now - t0)
                print(f"[heartbeat] {base} running {elapsed}s...")
                last_beat = now
            if now - t0 > TIMEOUT_PER_SHARD:
                print(f"Timeout after {TIMEOUT_PER_SHARD}s; terminating shard {base}")
                try:
                    p.terminate(); p.wait(timeout=10)
                except Exception:
                    pass
                try:
                    p.kill()
                except Exception:
                    pass
                ret = 124
                break
        if ret is None:
            ret = p.wait()
    except Exception as e:
        print('Runner failed:', e)
        ret = 2

    # OUT_ROOT/jsonlar altındaki JSON'ları köke taşı
import shutil
moved = []
jsonlar_dir = os.path.join(OUT_ROOT, 'jsonlar')
if os.path.isdir(jsonlar_dir):
    for f in glob.glob(os.path.join(jsonlar_dir, '*.json')):
        dest = os.path.join(OUT_ROOT, os.path.basename(f))
        try:
            os.replace(f, dest)
            moved.append(os.path.basename(f))
        except Exception as e:
            print('Move failed', f, '->', dest, e)
    try:
        if not os.listdir(jsonlar_dir):
            os.rmdir(jsonlar_dir)
    except Exception:
        pass

    # Yeni üretilen dosya sayıları
def count_new(dir_path, suffix):
    cnt = 0
    try:
        for nm in os.listdir(dir_path):
            if suffix and not nm.lower().endswith(suffix):
                continue
            pth = os.path.join(dir_path, nm)
            try:
                if os.stat(pth).st_mtime >= t0:
                    cnt += 1
            except Exception:
                pass
    except Exception:
        pass
    return cnt
new_imgs = count_new(os.path.join(OUT_ROOT, 'images'), '.png')
new_segs = count_new(os.path.join(OUT_ROOT, 'segmentation'), '_seg.png')
new_insts = count_new(os.path.join(OUT_ROOT, 'instance'), '_inst.png')

dur = round(time.time() - t0, 1)
try:
    with open(os.path.join(LOG_DIR, 'run.csv'), 'a', encoding='utf-8') as lf:
        lf.write(f"{base},{ret},{dur},{len(moved)}\n")
except Exception as e:
    print('Log write failed:', e)

if ret == 0:
    try:
        with open(done_marker, 'w', encoding='utf-8') as dm:
            dm.write('ok')
    except Exception as e:
        print('Done marker write failed:', e)

tail_snip = combined_tail[-20:]
if tail_snip:
    print('--- tail (last 20 lines) ---')
    for ln in tail_snip:
        print(ln)
    print('--- end tail ---')

print({'shard': base, 'exit': ret, 'sec': dur, 'moved_jsons': moved[:3], 'new_outputs': {'images': new_imgs, 'seg': new_segs, 'inst': new_insts}})
]},{
:
,
:{
:
},
:[
7
,
,
,
,
{OUT_ROOT}/images/*.png"))
seg_count = len(glob.glob(f"{OUT_ROOT}/segmentation/*_seg.png"))
inst_count = len(glob.glob(f"{OUT_ROOT}/instance/*_inst.png"))
json_files = sorted(glob.glob(f"{OUT_ROOT}/*_train.json") + glob.glob(f"{OUT_ROOT}/*_test.json"))
print({'images': img_count, 'segmentation': seg_count, 'instance': inst_count, 'json_total': len(json_files), 'json_sample': [os.path.basename(x) for x in json_files[:5]]})

In [None]:
# Hücre 8

# Log özet ve ilerleme
import os, glob
LOG_DIR = os.path.join(OUT_ROOT, 'logs')
run_csv = os.path.join(LOG_DIR, 'run.csv')
done_markers = sorted(glob.glob(os.path.join(LOG_DIR, '*.done')))
print({'log_dir_exists': os.path.isdir(LOG_DIR), 'done_markers': len(done_markers), 'done_samples': [os.path.basename(x) for x in done_markers[:5]]})
if os.path.isfile(run_csv):
    with open(run_csv, 'r', encoding='utf-8') as f:
        lines = [ln.strip() for ln in f if ln.strip()]
    print('run.csv last 10:')
    for ln in lines[-10:]:
        print('  ', ln)
else:
    print('run.csv yok')

print({'TMN_TIMEOUT_SEC': os.environ.get('TMN_TIMEOUT_SEC', '3600'), 'TMN_HEARTBEAT_SEC': os.environ.get('TMN_HEARTBEAT_SEC', '30')})