In [22]:
from pathlib import Path
import re
import shutil
import gzip
import sys, os
repo_root = os.path.abspath("..")
sys.path.append(repo_root)
from Helpers.extensions import I3Compressor


In [23]:
SRC_DIR = Path("/project/def-nahee/kbas/POM_Response")
DST_DIR = Path("/project/def-nahee/kbas/POM_Response_GZ")

DST_DIR.mkdir(parents=True, exist_ok=True)


In [24]:
dry_run = False  # Ã¶nce True; kontrol ettikten sonra False yap


In [25]:
BATCH_RE = re.compile(r"batch_(\d+)")

def batch_id_from_name(name: str):
    m = BATCH_RE.search(name)
    return m.group(1) if m else None


In [26]:
src_i3 = sorted(SRC_DIR.glob("*.i3"))
src_map = {}  
for p in src_i3:
    bid = batch_id_from_name(p.name)
    if bid is not None:
        src_map[bid] = p


In [27]:
dst_gz = sorted(DST_DIR.glob("*.i3.gz"))
dst_ids = set()
for p in dst_gz:
    bid = batch_id_from_name(p.name)
    if bid is not None:
        dst_ids.add(bid)


In [28]:
missing_ids = sorted(set(src_map.keys()) - dst_ids)

print(f"SRC .i3 count: {len(src_i3)} (with parsed ids: {len(src_map)})")
print(f"DST .i3.gz count: {len(dst_gz)} (with parsed ids: {len(dst_ids)})")
print(f"Missing gz for ids: {len(missing_ids)}")
print("First 20 missing ids:", missing_ids[:20])



SRC .i3 count: 4996 (with parsed ids: 4996)
DST .i3.gz count: 4996 (with parsed ids: 4996)
Missing gz for ids: 0
First 20 missing ids: []


In [29]:
if not missing_ids:
    print("Nothing to do.")
else:
    for bid in missing_ids:
        f = src_map[bid]
        gz_target = DST_DIR / (f.name + ".gz")  # file.i3 -> file.i3.gz

        if gz_target.exists():
            # Normally we shouldn't get here (we already filtered by IDs),
            # but keep this as a safety check.
            print("SKIP (already exists):", gz_target)
            continue

        if dry_run:
            print("DRY RUN:", f, "->", gz_target)
            continue

        print("COMPRESSING:", f, "->", gz_target)
        with f.open("rb") as fin, gzip.open(gz_target, "wb") as fout:
            shutil.copyfileobj(fin, fout)

    if dry_run:
        print("\nDry run finished. Set dry_run=False to actually compress.")
    else:
        print("\nDone.")


Nothing to do.
