In [1]:
# import libraries
import os
import json
import pandas as pd
from datetime import datetime, timedelta

In [3]:
# Directory containing folders with JSON files
base_directory = r"Y:\LP_birds\LP_birds_2023\ARUs\ARU_audiofiles"

# set output .csv path

out_csv = r'Y:\LP_birds\LP_birds_2023\ARUs\ARU_audiofiles\aru_all_detections_ex.csv'

In [4]:
# Initialize a list to store the data
data_list = []

# Recursively traverse subfolders and extract relevant information from JSON files
for root, dirs, files in os.walk(base_directory):
    for filename in files:
        if filename.endswith(".json"):
            json_file_path = os.path.join(root, filename)
            with open(json_file_path, "r") as json_file:
                data = json.load(json_file)
                detections = data.get("detections", [])
                for detection in detections:
                    common_name = detection.get("common_name")
                    scientific_name = detection.get("scientific_name")
                    date_time_str = data.get("date")
                    date_time = datetime.strptime(date_time_str, "%Y-%m-%d %H:%M:%S") if date_time_str else None
                    ARU_id = data.get("aru_id")
                    confidence = detection.get("confidence")
                    start_time_seconds = detection.get("start_time")

                    if date_time and start_time_seconds is not None:
                        # Calculate the time by adding start_time (in seconds) to the original date and time
                        time_delta = timedelta(seconds=start_time_seconds)
                        detection_time = date_time + time_delta
                    else:
                        detection_time = None

                    data_list.append({
                        "ARU_id": ARU_id,
                        "common_name": common_name,
                        "scientific_name": scientific_name,
                        "date": date_time.date() if date_time else None,
                        "time": detection_time.time() if detection_time else None,
                        "confidence": confidence
                    })

# Create a pandas DataFrame from the extracted data
df = pd.DataFrame(data_list)

# Print the resulting DataFrame
print(df)


           ARU_id           common_name      scientific_name        date  \
0      LPC1_13122               Mallard   Anas platyrhynchos  2023-08-07   
1      LPC1_13122          Caspian Tern   Hydroprogne caspia  2023-08-07   
2      LPC1_13122               Mallard   Anas platyrhynchos  2023-08-07   
3      LPC1_13122               Mallard   Anas platyrhynchos  2023-08-07   
4      LPC1_13122               Mallard   Anas platyrhynchos  2023-08-07   
...           ...                   ...                  ...         ...   
37443  LPT2_01315        Common Grackle   Quiscalus quiscula  2023-08-14   
37444  LPT2_01315        Common Grackle   Quiscalus quiscula  2023-08-14   
37445  LPT2_01315        Common Grackle   Quiscalus quiscula  2023-08-14   
37446  LPT2_01315  Red-winged Blackbird  Agelaius phoeniceus  2023-08-14   
37447  LPT2_01315          Caspian Tern   Hydroprogne caspia  2023-08-14   

           time  confidence  
0      04:32:18    0.126827  
1      04:32:18    0.126410

In [47]:
df.to_csv(out_csv)