In [21]:
# load json file
import json
import os

# load json file
def load_json_file(file_path):
    """
    Load a JSON file and return its contents.
    
    Args:
        file_path (str): The path to the JSON file.
        
    Returns:
        dict: The contents of the JSON file.
    """
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    return data

In [31]:
data = load_json_file('/home/nele_pauline_suffo/ProcessedData/childlens_annotations/205296.MP4.json')

In [None]:
first = data.get('instances', [])
first[0]["parameters"][0]["timestamps"][0]

{'attributes': [{'id': 6169950,
   'groupId': 975661,
   'name': 'Playroom',
   'groupName': 'Type of Location'}],
 'timestamp': 0}

In [45]:
first[11]['meta']

{'id': 'MTM4Mjk0Ni4zNTI5NjQ4OTg2',
 'type': 'event',
 'classId': 1884771,
 'className': 'Action',
 'createdBy': {'email': 'sa.partner.ana+1@gmail.com', 'role': 'Annotator'},
 'createdAt': '2024-09-09T02:12:53.635Z',
 'updatedBy': {'email': 'sa.partner.ana+1@gmail.com', 'role': 'Annotator'},
 'updatedAt': '2024-09-09T02:13:06.425Z',
 'start': 219650323,
 'end': 222797328}

In [None]:
import json
from pathlib import Path
from glob import glob
from collections import Counter

# Define the folder where your JSON files are located
input_folder = "/home/nele_pauline_suffo/ProcessedData/childlens_annotations"
output_rttm_path = "combined_output.rttm"

# Define relevant labels to include
valid_action_names = {"Child Talking", "Other Person Talking", "Overheard Speech"}

rttm_lines = []
speaker_counts = Counter()
files_processed = 0

# Get all JSON files in the folder
json_files = glob(f"{input_folder}/*.json")

for json_file in json_files:
    try:
        print(f"Processing {json_file}...")
        with open(json_file, "r") as f:
            annotations = json.load(f)

        uri = annotations['metadata']['name']
        files_processed += 1

        for instance in annotations.get('instances', []):
            # filter only actions (exclude location and comments)
            # check available keys in instance["meta"] and print only if className not in it
            if instance["meta"]["type"] != "event":
                continue
            try:
                details = instance["parameters"][0]["timestamps"]
                timestamps = instance["parameters"][0]

                for detail in details:
                    action_type = next(
                        (attr["name"] for attr in detail["attributes"]
                         if attr["groupName"] == "Type of Action"),
                        None
                    )  

                    if action_type in valid_action_names:
                        # Determine speaker ID
                        speaker_id = "speech"

                        if action_type == "Child Talking":
                            speaker_id = "kchi"
                        elif action_type == "Other Person Talking":
                            age_group = next(
                                (attr["name"] for attr in detail["attributes"]
                                 if attr["groupName"] == "1st Person Age Group"),
                                None
                            )
                            gender = next(
                                (attr["name"] for attr in detail["attributes"]
                                 if attr["groupName"] == "1st Person Gender"),
                                None
                            )

                            if age_group in ["Child", "Infant"]:
                                speaker_id = "och"
                            elif age_group in ["Adult", "Adolescent"]:
                                if gender == "Female":
                                    speaker_id = "fem"
                                elif gender == "Male":
                                    speaker_id = "mal"

                        elif action_type == "Overheard Speech":
                            speaker_id = "ovh"

                        # Timing
                        start = timestamps["start"] / 1_000_000
                        end = timestamps["end"] / 1_000_000
                        duration = end - start

                        # Format RTTM line
                        rttm_line = f"SPEAKER {uri} 1 {start:.3f} {duration:.3f} <NA> <NA> {speaker_id} <NA> <NA>"
                        if speaker_id != "speech":
                            rttm_lines.append(rttm_line)
                            speaker_counts[speaker_id] += 1
                        #rttm_lines.append(rttm_line)
                        #speaker_counts[speaker_id] += 1
                        break  # Only use the first relevant details block per instance
            except Exception as e:
                print(f"Skipping instance in {json_file} due to error: {e}")

    except Exception as e:
        print(f"Skipping file {json_file} due to error: {e}")

# Write all RTTM lines to a single file
with open(output_rttm_path, "w") as f:
    for line in rttm_lines:
        f.write(line + "\n")

# Final logs
print(f"\n✅ Processed {files_processed} JSON files.")
print("\n🎙️ Speaker instance counts in RTTM output:")
for speaker_id in ['kchi', 'och', 'fem', 'mal', 'ovh', 'speech']:
    print(f"  {speaker_id}: {speaker_counts[speaker_id]}")

Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/204839.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/205296.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/207115.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/208080.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/208409.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/210803.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/212983.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/217622.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/225742.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/227279.MP4.json...
Processing /home/nele_pauline_suffo/ProcessedData/childlens_annotations/244555.MP4.json...