In [2]:
import os
import shutil
import pandas as pd
import pickle

pickle_path = "results/sahi_detections_07_17-30_24/pickles"
csv_path = "csv/sahi_detections_07_17-30_24.csv"
pickle_list_file = "source_pickles.txt"

pickles = [f for f in os.listdir(pickle_path) if f.endswith(".pickle")]
with open(pickle_list_file, "w") as f:
    f.write("\n".join(pickles) + ("\n" if pickles else ""))

with open(pickle_list_file, "r") as f:
    pickle_list = [line.strip() for line in f if line.strip()]

for pickle_name in pickle_list:
    pickle_file = os.path.join(pickle_path, pickle_name)

    try:
        rows = []
        with open(pickle_file, "rb") as f:
            try:
                data = pickle.load(f)
            except Exception as e:
                print(f"  [ERROR] Could not load {pickle_name}: {e}")
                continue

            if isinstance(data, list) and len(data) == 0:
                continue

            for obj_pred in data:
                try:
                    bbox = obj_pred.bbox.to_xywh()
                    score = obj_pred.score.value
                    label = obj_pred.category.id
                except Exception as e:
                    print(f"  [WARN] Skipping object in {pickle_name}: {e}")
                    continue

                stem = os.path.splitext(pickle_name)[0]
                rows.append((stem, bbox, score, label))

        if rows:
            df = pd.DataFrame(rows, columns=["filename", "bbox", "score", "label"])
            write_header = not os.path.exists(csv_path)
            df.to_csv(csv_path, mode="a", header=write_header, index=False)

        with open(pickle_list_file, "r") as f:
            lines = [line.strip() for line in f if line.strip()]
        lines = [line for line in lines if line != pickle_name]
        with open(pickle_list_file, "w") as f:
            f.write("\n".join(lines) + ("\n" if lines else ""))
        print(f"Removed {pickle_name} from {pickle_list_file}")

    except Exception as e:
        print(f"[ERROR] Could not process {pickle_file}: {e}")


Removed pi9_20240722_093407.pickle from source_pickles.txt
Removed pi6_20240723_102624.pickle from source_pickles.txt
Removed pi7_20240721_140051.pickle from source_pickles.txt
Removed pi6_20240723_100753.pickle from source_pickles.txt
Removed pi9_20240723_095953.pickle from source_pickles.txt
Removed pi7_20240727_113926.pickle from source_pickles.txt
Removed pi1_20240711_063442.pickle from source_pickles.txt
Removed pi1_20240727_153719.pickle from source_pickles.txt
Removed pi1_20240711_053446.pickle from source_pickles.txt
Removed pi1_20240721_171314.pickle from source_pickles.txt
Removed pi7_20240727_083331.pickle from source_pickles.txt
Removed pi7_20240721_091907.pickle from source_pickles.txt
Removed pi1_20240727_153548.pickle from source_pickles.txt
Removed pi7_20240727_084638.pickle from source_pickles.txt
Removed pi1_20240727_143054.pickle from source_pickles.txt
Removed pi1_20240727_153706.pickle from source_pickles.txt
Removed pi1_20240727_172153.pickle from source_pickles.t

In [9]:
import pandas as pd

data = pd.read_csv('csv/sahi_detections_07_17-30_24.csv')
filenames = data['filename'].unique().tolist()
filenames = [x + '.jpg' for x in filenames]

In [11]:
import shutil
from multiprocessing import Pool, cpu_count

source_dir = "datasets/sahi_detections_07_17-30_24"
output_dir = "datasets/sahi_detections_nostandard_output"

os.makedirs(output_dir, exist_ok=True)

def process_file(filename):
    src = os.path.join(source_dir, filename)
    dst = os.path.join(output_dir, filename)
    if os.path.exists(src):
        shutil.copy2(src, dst)
    return filename

if __name__ == "__main__":
    with Pool(cpu_count()) as pool:
        for result in pool.imap_unordered(process_file, filenames):
            print("Processed:", result)

Processed: pi9_20240722_093407.jpg
Processed: pi1_20240711_053446.jpg
Processed: pi6_20240723_100753.jpg
Processed: pi1_20240721_171314.jpg
Processed: pi6_20240723_102624.jpg
Processed: pi1_20240711_062607.jpg
Processed: pi7_20240719_095803.jpg
Processed: pi9_20240723_102320.jpg
Processed: pi6_20240720_085646.jpg
Processed: pi1_20240711_061311.jpg
Processed: pi7_20240721_140051.jpg
Processed: pi1_20240727_153719.jpg
Processed: pi1_20240714_134002.jpg
Processed: pi1_20240722_100931.jpg
Processed: pi1_20240728_144948.jpg
Processed: pi1_20240727_153706.jpg
Processed: pi5_20240722_125612.jpg
Processed: pi1_20240711_062004.jpg
Processed: pi1_20240711_063442.jpg
Processed: pi9_20240723_095212.jpg
Processed: pi9_20240723_095953.jpg
Processed: pi6_20240719_062759.jpg
Processed: pi1_20240727_172153.jpg
Processed: pi3_20240727_172142.jpg
Processed: pi7_20240726_072705.jpg
Processed: pi5_20240722_131942.jpg
Processed: pi8_20240720_103805.jpg
Processed: pi1_20240727_171045.jpg
Processed: pi7_20240