In [1]:
import pandas as pd
from pathlib import Path
from datetime import date
import os

In [None]:
root_dir = Path("/home/misha/data/bee_cam/2024_image_processing/full_run_done")  


def normalize_download_date(folder_name: str):
    digits = "".join(ch for ch in folder_name if ch.isdigit())
    if len(digits) != 6:
        return None
    mm, dd, yy = int(digits[:2]), int(digits[2:4]), int(digits[4:6])
    yyyy = 2000 + yy
    try:
        return date(yyyy, mm, dd).isoformat()
    except ValueError:
        return None

rows = []

for txt_file in root_dir.rglob("detection_counts.txt"):
    try:
        download_date_folder = txt_file.parents[3].name  
        pi_folder = txt_file.parents[2].name             
    except IndexError:
        continue

    norm_date = normalize_download_date(download_date_folder)

    with open(txt_file, "r") as f:
        for line in f:
            line = line.strip()
            if not line or ":" not in line:
                continue
            fname, num_str = line.split(":", 1)
            fname = fname.strip()
            num = int(float(num_str.strip()))  
            rows.append({
                "filename": fname,
                "num": num,
                "download_date": download_date_folder,    
                "download_date_iso": norm_date,            
                "pi": pi_folder
            })

df = pd.DataFrame(rows)
df

Unnamed: 0,filename,num,download_date,download_date_iso,pi
0,pi2_20240712_050157.jpg,0,07_17_24,2024-07-17,pi2
1,pi2_20240712_050200.jpg,0,07_17_24,2024-07-17,pi2
2,pi2_20240712_050202.jpg,0,07_17_24,2024-07-17,pi2
3,pi2_20240712_050204.jpg,0,07_17_24,2024-07-17,pi2
4,pi2_20240712_050206.jpg,0,07_17_24,2024-07-17,pi2
...,...,...,...,...,...
18288025,pi6_20240815_124624.jpg,0,out_081624,2024-08-16,pi6
18288026,pi6_20240815_124625.jpg,0,out_081624,2024-08-16,pi6
18288027,pi6_20240815_124627.jpg,0,out_081624,2024-08-16,pi6
18288028,pi6_20240815_124628.jpg,0,out_081624,2024-08-16,pi6


In [3]:
sorted(df['download_date_iso'].unique().tolist())

['2024-07-17',
 '2024-07-22',
 '2024-07-24',
 '2024-07-26',
 '2024-07-28',
 '2024-07-30',
 '2024-08-01',
 '2024-08-02',
 '2024-08-06',
 '2024-08-08',
 '2024-08-10',
 '2024-08-12',
 '2024-08-16',
 '2024-08-20',
 '2024-08-23',
 '2024-08-26',
 '2024-08-28',
 '2024-09-04',
 '2024-09-09',
 '2024-09-11']

In [6]:
all_detections = df[df['num']>0].sort_values(['download_date_iso', 'pi']).reset_index(drop=True)
all_detections.to_csv('csv/2024_detections.csv')