In [1]:
import os
from typing import Dict, List
from collections import defaultdict
import json

import psycopg2
from psycopg2.extras import DictCursor

# DATABASE_URI = os.getenv('DATABASE_URI')
DATABASE_URI = "postgres://u1ebg42rhhckf0:pac131d64a38dac70d44f59022e8f5df7a657ce197810ac9d77a42f66633b0113@cfls9h51f4i86c.cluster-czrs8kj4isg7.us-east-1.rds.amazonaws.com:5432/d7fvue7mrcug01"
TABLE_PREFIX = "test1_"
ANNOTATION_TABLE_NAME = TABLE_PREFIX + "annotations"
SUBTASK_TABLE_NAME = TABLE_PREFIX + "subtasks"
ANNOTATIONS_FOLDER = 'output'
ANNOTATIONS_FILE_PATH = 'annotations_output.json'

In [2]:
conn = psycopg2.connect(DATABASE_URI, sslmode="require")
cursor = conn.cursor(cursor_factory=DictCursor)

In [3]:
def defaultdict_to_regular(d):
    """ Recursively converts defaultdict to dict """
    if isinstance(d, defaultdict):
        d = {k: defaultdict_to_regular(v) for k, v in d.items()}
    return d

In [4]:
def fetch_annotations(cursor: psycopg2.extensions.cursor) -> Dict[str, Dict]:
    try:
        # Fetch all annotations
        query = f"""   
            SELECT a.video_filename, a.id, a.username, a.created_at, s.start_step, s.end_step, s.subtask, s.time_spent
            FROM {ANNOTATION_TABLE_NAME} a
            JOIN {SUBTASK_TABLE_NAME} s ON a.id = s.annotation_id"""

        cursor.execute(query)
        rows = cursor.fetchall()

        annotations = defaultdict(lambda: defaultdict(lambda: {
            "username": None,
            "created_at": None,
            "subtask_decomposition": [],
            "time_spent": 0,
        }))

        # Group the data by video filename
        for row in rows:
            # Using field names instead of positional indexing
            video_filename, annotation_id = row['video_filename'], row['id']

            annotations[video_filename][annotation_id]["username"] = row['username']
            annotations[video_filename][annotation_id]["created_at"] = row['created_at']
            annotations[video_filename][annotation_id]["subtask_decomposition"].append(
                (row['start_step'], row['end_step'], row['subtask'])
            )
            annotations[video_filename][annotation_id]["time_spent"] += row['time_spent']
         

    except Exception as e:
        print(f"An error occurred: {e}")
        conn.rollback()  # Roll back the transaction in case of error
    finally:
        cursor.close()
        conn.close()

    return defaultdict_to_regular(annotations)


test1_annotations = fetch_annotations(cursor)

In [5]:
test1_annotations

{'Door_20240213-183948_0.mp4': {1: {'username': 'luna',
   'created_at': datetime.datetime(2024, 10, 2, 15, 46, 9, 445322),
   'subtask_decomposition': [(0, 20, 'move to above door handle'),
    (20, 40, 'move down to turn door handle'),
    (41, 56, 'open door'),
    (57, 79, 'return  to home')],
   'time_spent': 657},
  2: {'username': 'kevin_torres',
   'created_at': datetime.datetime(2024, 10, 2, 15, 47, 53, 526493),
   'subtask_decomposition': [(0, 20, 'arm moves over door handle'),
    (21, 39, 'arm pushes down on door handle'),
    (40, 56, 'arm pulls on door handle'),
    (57, 79, 'arm releases door handle and stows away')],
   'time_spent': 413}},
 'Lift_20240220-124804_17.mp4': {3: {'username': 'kevin_torres',
   'created_at': datetime.datetime(2024, 10, 2, 15, 49, 50, 442366),
   'subtask_decomposition': [(0, 12, 'arm moves over block'),
    (12, 15, 'arm grasps block'),
    (16, 28, 'arm lifts the block ')],
   'time_spent': 95},
  4: {'username': 'luna',
   'created_at': d

In [6]:
def write_annotations_to_file(annotations: Dict[str, Dict], file_path: str):
    '''
    Write the annotations data to a JSON file inside a new, dedicated directory.
    '''
    # Define the new folder path (e.g., 'task_decomposition/output')
    output_dir = os.path.join("task_decomposition", ANNOTATIONS_FOLDER)
    
     # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Full path to output file
    file_path = os.path.join(output_dir, filename)

    # Convert annotations dictionary to a JSON string
    with open(file_path, 'w') as file:
        json.dump(annotations, file, indent=4, default=str)  # Using default=str to handle non-serializable objects like datetime