In [1]:
import os, glob, time
import pandas as pd

from icecube import dataio, icetray



In [2]:
def count_physics_frames(path: str, max_frames: int | None = None) -> tuple[int, int, str]:
    """
    Returns: (n_physics, n_total_frames_read, error_msg)
    max_frames: None -> full scan, or set e.g. 20000 to cap for safety.
    """
    n_phys = 0
    n_total = 0
    err = ""

    try:
        f = dataio.I3File(path)
        # if f.more() is False here => file empty/unreadable (as you saw)
        while f.more():
            fr = f.pop_frame()
            n_total += 1
            if fr.Stop == icetray.I3Frame.Physics:
                n_phys += 1

            if max_frames is not None and n_total >= max_frames:
                break
    except Exception as e:
        err = repr(e)

    return n_phys, n_total, err

def scan_folder_for_empty_i3(input_dir: str, pattern: str = "**/*.i3.gz", full_scan: bool = False):
    files = sorted(glob.glob(os.path.join(input_dir, pattern), recursive=True))
    rows = []

    for p in files:
        t0 = time.time()
        size = os.path.getsize(p) if os.path.exists(p) else None

        if full_scan:
            n_phys, n_total, err = count_physics_frames(p, max_frames=None)
        else:
            # quick mode: frame'leri oku ama 1 physics bulunca bırak
            n_phys, n_total, err = 0, 0, ""
            try:
                f = dataio.I3File(p)
                while f.more():
                    fr = f.pop_frame()
                    n_total += 1
                    if fr.Stop == icetray.I3Frame.Physics:
                        n_phys = 1
                        break
            except Exception as e:
                err = repr(e)

        rows.append({
            "file": p,
            "basename": os.path.basename(p),
            "size_bytes": size,
            "n_physics": n_phys,
            "n_frames_read": n_total,
            "has_physics": (n_phys > 0),
            "error": err,
            "sec": round(time.time() - t0, 3),
        })

    df = pd.DataFrame(rows)

    print("Total files:", len(df))
    print("Files with NO physics:", int((~df["has_physics"]).sum()))
    print("Files with physics:", int(df["has_physics"].sum()))
    if "size_bytes" in df.columns:
        print("Zero/very small files (<1KB):", int((df["size_bytes"].fillna(0) < 1024).sum()))

    return df



In [3]:
INPUT_DIR = "/project/def-nahee/kbas/POM_Response_GZ"  

df_quick = scan_folder_for_empty_i3(INPUT_DIR, pattern="*.i3.gz", full_scan=False)


Total files: 4420
Files with NO physics: 44
Files with physics: 4376
Zero/very small files (<1KB): 22


In [4]:
df_quick.sort_values(["has_physics", "size_bytes"]).head(30)


Unnamed: 0,file,basename,size_bytes,n_physics,n_frames_read,has_physics,error,sec
1794,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_2643.i3.gz,0,0,0,False,,0.0
3248,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_3971.i3.gz,0,0,0,False,,0.0
1757,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_261.i3.gz,46,0,0,False,,0.0
2633,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_341.i3.gz,46,0,0,False,,0.001
67,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_1060.i3.gz,47,0,0,False,,0.0
568,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_1524.i3.gz,47,0,0,False,,0.003
664,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_1611.i3.gz,47,0,0,False,,0.0
1309,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_2201.i3.gz,47,0,0,False,,0.0
1319,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_2210.i3.gz,47,0,0,False,,0.0
1458,/project/def-nahee/kbas/POM_Response_GZ/pom_re...,pom_response_batch_2337.i3.gz,47,0,0,False,,0.0


### To Delete All GZ Files That Has I3 Pair

In [5]:
import os
from pathlib import Path

DIR = Path("/project/def-nahee/kbas/POM_Response")

dry_run = False  


In [6]:
i3_files = list(DIR.rglob("*.i3"))

to_delete = []
for i3 in i3_files:
    gz = i3.with_suffix(i3.suffix + ".gz")   # file.i3 -> file.i3.gz
    if gz.exists():
        to_delete.append(gz)


In [7]:
print(f"Found {len(to_delete)} .i3.gz files that also have a .i3 twin.")
for p in to_delete[:20]:
    print("DELETE:", p)
if len(to_delete) > 20:
    print("...")


Found 0 .i3.gz files that also have a .i3 twin.


In [8]:
if not dry_run:
    for p in to_delete:
        try:
            p.unlink()
        except Exception as e:
            print("Could not delete:", p, "->", e)
    print("Done.")
else:
    print("Dry-run mode ON (nothing deleted). Set dry_run=False to delete.")

Done.


### To Delete All GZ Inside A Folder

In [9]:
from pathlib import Path

DIR = Path("/project/def-nahee/kbas/POM_Response")

dry_run = False  # önce True, emin olunca False yap

gz_files = sorted(DIR.rglob("*.i3.gz"))
print(f"Found {len(gz_files)} files matching *.i3.gz")

# ilk 20 tanesini göster
for p in gz_files[:20]:
    try:
        print("WILL DELETE:", p, "size:", p.stat().st_size)
    except Exception as e:
        print("CANNOT STAT:", p, "->", e)

if len(gz_files) > 20:
    print("...")

if not dry_run:
    failed = []
    for p in gz_files:
        try:
            p.unlink()
        except Exception as e:
            failed.append((str(p), str(e)))

    print(f"Deleted {len(gz_files) - len(failed)} / {len(gz_files)}")
    if failed:
        print("\nFailed deletes (first 20):")
        for f in failed[:20]:
            print(f)
else:
    print("\nDry-run ON: nothing deleted. Set dry_run=False to delete.")


Found 0 files matching *.i3.gz
Deleted 0 / 0
