In [11]:
# load json file
import json
import os

# load json file
def load_json_file(file_path):
    """
    Load a JSON file and return its contents.
    
    Args:
        file_path (str): The path to the JSON file.
        
    Returns:
        dict: The contents of the JSON file.
    """
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    return data

In [12]:
data = load_json_file('/home/nele_pauline_suffo/ProcessedData/childlens_annotations/205296.MP4.json')

In [14]:
data['metadata']['duration']


1800198400

In [45]:
first[11]['meta']

{'id': 'MTM4Mjk0Ni4zNTI5NjQ4OTg2',
 'type': 'event',
 'classId': 1884771,
 'className': 'Action',
 'createdBy': {'email': 'sa.partner.ana+1@gmail.com', 'role': 'Annotator'},
 'createdAt': '2024-09-09T02:12:53.635Z',
 'updatedBy': {'email': 'sa.partner.ana+1@gmail.com', 'role': 'Annotator'},
 'updatedAt': '2024-09-09T02:13:06.425Z',
 'start': 219650323,
 'end': 222797328}

In [16]:
import json
from pathlib import Path
from glob import glob
from collections import Counter

# Folder and output paths
input_folder = "/home/nele_pauline_suffo/ProcessedData/childlens_annotations"
output_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens")
output_dir.mkdir(exist_ok=True)

valid_action_names = {"Child Talking", "Other Person Talking", "Overheard Speech"}

all_files = []
speaker_counts = Counter()
files_processed = 0

# Step 1: Load all JSON files and collect metadata
json_files = glob(f"{input_folder}/*.json")
for json_file in json_files:
    try:
        with open(json_file, "r") as f:
            annotations = json.load(f)
        uri = annotations['metadata']['name']
        duration = annotations['metadata']['duration'] / 1_000_000  # microseconds to seconds
        all_files.append({
            "path": json_file,
            "uri": uri,
            "duration": duration
        })
    except Exception as e:
        print(f"Skipping file {json_file} due to error: {e}")

# Step 2: Sort and split files by total duration
all_files.sort(key=lambda x: x["duration"], reverse=True)
total_duration = sum(f["duration"] for f in all_files)

train_duration, val_duration, test_duration = 0, 0, 0
train_files, val_files, test_files = [], [], []

for f in all_files:
    if train_duration < 0.8 * total_duration:
        train_files.append(f)
        train_duration += f["duration"]
    elif val_duration < 0.1 * total_duration:
        val_files.append(f)
        val_duration += f["duration"]
    else:
        test_files.append(f)
        test_duration += f["duration"]

splits = {
    "train": train_files,
    "val": val_files,
    "test": test_files
}
split_durations = {
    "train": train_duration,
    "val": val_duration,
    "test": test_duration
}

# Step 3: Process each split and write RTTM
for split_name, files in splits.items():
    rttm_lines = []
    for f in files:
        try:
            with open(f["path"], "r") as file:
                annotations = json.load(file)
            uri = annotations['metadata']['name']
            files_processed += 1

            for instance in annotations.get('instances', []):
                if instance["meta"]["type"] != "event":
                    continue
                try:
                    details = instance["parameters"][0]["timestamps"]
                    timestamps = instance["parameters"][0]

                    for detail in details:
                        if "attributes" not in detail:
                            continue
                        action_type = next(
                            (attr["name"] for attr in detail["attributes"]
                             if attr["groupName"] == "Type of Action"),
                            None
                        )  

                        if action_type in valid_action_names:
                            # Determine speaker ID
                            speaker_id = "speech"

                            if action_type == "Child Talking":
                                speaker_id = "kchi"
                            elif action_type == "Other Person Talking":
                                age_group = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Age Group"),
                                    None
                                )
                                gender = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Gender"),
                                    None
                                )

                                if age_group in ["Child", "Infant"]:
                                    speaker_id = "och"
                                elif age_group in ["Adult", "Adolescent"]:
                                    if gender == "Female":
                                        speaker_id = "fem"
                                    elif gender == "Male":
                                        speaker_id = "mal"

                            elif action_type == "Overheard Speech":
                                speaker_id = "ovh"

                            # Timing
                            start = timestamps["start"] / 1_000_000
                            end = timestamps["end"] / 1_000_000
                            duration = end - start

                            rttm_line = f"SPEAKER {uri} 1 {start:.3f} {duration:.3f} <NA> <NA> {speaker_id} <NA> <NA>"

                            if speaker_id != "speech":
                                rttm_lines.append(rttm_line)
                                speaker_counts[speaker_id] += 1
                                break  # Only use the first relevant block per instance
                except Exception as e:
                    print(f"Skipping instance in {f['path']} due to error: {e}")
        except Exception as e:
            print(f"Error reading file {f['path']}: {e}")

    # Save to RTTM file
    rttm_path = output_dir / f"{split_name}.rttm"
    with open(rttm_path, "w") as out_f:
        for line in rttm_lines:
            out_f.write(line + "\n")

# Step 4: Summary logs
print(f"\n✅ Total processed files: {files_processed}")
print("\n🎙️ Speaker instance counts in all splits:")
for speaker_id in ['kchi', 'och', 'fem', 'mal', 'ovh']:
    print(f"  {speaker_id}: {speaker_counts[speaker_id]}")

print("\n📊 RTTM split durations and video counts:")
for split_name in ["train", "val", "test"]:
    dur = split_durations[split_name]
    perc = (dur / total_duration) * 100
    count = len(splits[split_name])
    print(f"  {split_name}: {dur:.2f} sec ({perc:.1f}%), {count} videos")


✅ Total processed files: 161

🎙️ Speaker instance counts in all splits:
  kchi: 11295
  och: 701
  fem: 6193
  mal: 2447
  ovh: 3172

📊 RTTM split durations and video counts:
  train: 146244.93 sec (80.1%), 98 videos
  val: 18337.89 sec (10.0%), 23 videos
  test: 17952.87 sec (9.8%), 40 videos
