In [None]:
import os
import shutil
import pandas as pd
import pickle
import tarfile

tar_path = "source_tars"
csv_path = "csv/detections.csv"
tar_list_file = "source_tars.txt"
processed_path = "processed_tars"

tars = [f for f in os.listdir(tar_path) if f.endswith(".tar") or f.endswith(".tar.gz")]
with open(tar_list_file, "w") as f:
    f.write("\n".join(tars) + ("\n" if tars else ""))

with open(tar_list_file, "r") as f:
    tar_list = [line.strip() for line in f if line.strip()]

for tar_name in tar_list:
    tar_file = os.path.join(tar_path, tar_name)
    
    try:
        with tarfile.open(tar_file, "r:*") as tar:
            pickle_members = [m for m in tar.getmembers() if m.name.endswith(".pickle")]
            print(f"  Found {len(pickle_members)} pickle files")

            rows = []

            for member in pickle_members:
                f = tar.extractfile(member)
                if f is None:
                    continue

                try:
                    data = pickle.load(f)
                except Exception as e:
                    continue
                if isinstance(data, list) and len(data) == 0:
                    continue

                for obj_pred in data:
                    bbox = obj_pred.bbox.to_xywh()
                    score = obj_pred.score.value
                    label = obj_pred.category.id
                    fname = os.path.basename(member.name)
                    stem = os.path.splitext(fname)[0]
                    rows.append((stem, bbox, score, label))

            df = pd.DataFrame(rows, columns=["filename", "bbox", "score", "label"])
            write_header = not os.path.exists(csv_path)
            df.to_csv('detections.csv', mode='a',  header=write_header, index=False)

        shutil.move(tar_file, os.path.join(processed_path, tar_name))
        print(f"Moved {tar_name} to {processed_path}")

        with open(tar_list_file, "r") as f:
            lines = [line.strip() for line in f if line.strip()]
        lines = [line for line in lines if line != tar_name]
        with open(tar_list_file, "w") as f:
            f.write("\n".join(lines) + ("\n" if lines else ""))
        print(f"Removed {tar_name} from {tar_list_file}")

    except Exception as e:
        print(f"[ERROR] Could not process {tar_file}: {e}")


  Found 192696 pickle files
[INFO] Removed 071724_preds.tar from source_tars.txt
  Found 1043749 pickle files
[INFO] Removed 072224_results.tar from source_tars.txt
  Found 1026906 pickle files
[INFO] Removed 072624_preds.tar from source_tars.txt
  Found 749408 pickle files
[INFO] Removed 072824_results.tar from source_tars.txt
  Found 1258603 pickle files
[INFO] Removed 073024_results.tar from source_tars.txt
  Found 934845 pickle files
[INFO] Removed 08_01_24_results.tar from source_tars.txt
  Found 771770 pickle files
[INFO] Removed 08_02_24_results.tar from source_tars.txt
  Found 1089482 pickle files
[INFO] Removed 08_06_24_results.tar from source_tars.txt
