## Generate RTTM File

In [2]:
import json
from pathlib import Path
from glob import glob
from collections import Counter

# Folder and output paths
input_folder = "/home/nele_pauline_suffo/ProcessedData/childlens_annotations"
output_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens")
output_dir.mkdir(exist_ok=True)

valid_action_names = {"Child Talking", "Other Person Talking", "Overheard Speech", "Singing/Humming"}

all_files = []
speaker_counts = Counter()
files_processed = 0

# Step 1: Load all JSON files and collect metadata
json_files = glob(f"{input_folder}/*.json")
for json_file in json_files:
    try:
        with open(json_file, "r") as f:
            annotations = json.load(f)
        uri = annotations['metadata']['name']
        duration = annotations['metadata']['duration'] / 1_000_000  # microseconds to seconds
        all_files.append({
            "path": json_file,
            "uri": uri,
            "duration": duration
        })
    except Exception as e:
        print(f"Skipping file {json_file} due to error: {e}")

# Step 2: Sort and split files by total duration
all_files.sort(key=lambda x: x["duration"], reverse=True)
total_duration = sum(f["duration"] for f in all_files)

train_duration, dev_duration, test_duration = 0, 0, 0
train_files, dev_files, test_files = [], [], []

for f in all_files:
    if train_duration < 0.8 * total_duration:
        train_files.append(f)
        train_duration += f["duration"]
    elif dev_duration < 0.1 * total_duration:
        dev_files.append(f)
        dev_duration += f["duration"]
    else:
        test_files.append(f)
        test_duration += f["duration"]

splits = {
    "train": train_files,
    "dev": dev_files,
    "test": test_files
}
split_durations = {
    "train": train_duration,
    "dev": dev_duration,
    "test": test_duration
}

# Step 3: Process each split and write RTTM
for split_name, files in splits.items():
    rttm_lines = []
    for f in files:
        try:
            with open(f["path"], "r") as file:
                annotations = json.load(file)
            uri = annotations['metadata']['name']
            files_processed += 1

            for instance in annotations.get('instances', []):
                if instance["meta"]["type"] != "event":
                    continue
                try:
                    details = instance["parameters"][0]["timestamps"]
                    timestamps = instance["parameters"][0]

                    for detail in details:
                        if "attributes" not in detail:
                            continue
                        action_type = next(
                            (attr["name"] for attr in detail["attributes"]
                             if attr["groupName"] == "Type of Action"),
                            None
                        )  

                        if action_type in valid_action_names:
                            # Determine speaker ID
                            speaker_id = "NA"

                            if action_type in ["Child Talking", "Singing/Humming"]:
                                speaker_id = "KCHI"
                            elif action_type == "Other Person Talking":
                                age_group = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Age Group"),
                                    None
                                )
                                gender = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Gender"),
                                    None
                                )

                                if age_group in ["Child", "Infant"]:
                                    speaker_id = "CHI"
                                elif age_group in ["Adult", "Adolescent"]:
                                    if gender == "Female":
                                        speaker_id = "FEM"
                                    elif gender == "Male":
                                        speaker_id = "MAL"

                            elif action_type == "Overheard Speech":
                                speaker_id = "OVH"

                            # Timing
                            start = timestamps["start"] / 1_000_000
                            end = timestamps["end"] / 1_000_000
                            duration = end - start

                            rttm_line = f"SPEAKER {uri} 1 {start:.3f} {duration:.3f} <NA> <NA> {speaker_id} <NA> <NA>"
                            rttm_line_speech = f"SPEAKER {uri} 1 {start:.3f} {duration:.3f} <NA> <NA> SPEECH <NA> <NA>"

                            if speaker_id != "NA":
                                rttm_lines.append(rttm_line)
                                rttm_lines.append(rttm_line_speech)
                                speaker_counts[speaker_id] += 1
                                speaker_counts["SPEECH"] += 1
                                break  # Only use the first relevant block per instance
                except Exception as e:
                    print(f"Skipping instance in {f['path']} due to error: {e}")
        except Exception as e:
            print(f"Error reading file {f['path']}: {e}")

    # Save to RTTM file
    rttm_path = output_dir / f"{split_name}.rttm"
    with open(rttm_path, "w") as out_f:
        for line in rttm_lines:
            out_f.write(line + "\n")

# Step 4: Summary logs
print(f"\n✅ Total processed files: {files_processed}")
print("\n🎙️ Speaker instance counts in all splits:")
for speaker_id in ['KCHI', 'CHI', 'FEM', 'MAL', 'OVH', 'SPEECH']:
    print(f"  {speaker_id}: {speaker_counts[speaker_id]}")

print("\n📊 RTTM split durations and video counts:")
for split_name in ["train", "dev", "test"]:
    dur = split_durations[split_name]
    perc = (dur / total_duration) * 100
    count = len(splits[split_name])
    print(f"  {split_name}: {dur:.2f} sec ({perc:.1f}%), {count} videos")

# Generate .lst files for train, development, and test splits
for split_name, files in splits.items():
    lst_path = output_dir / f"{split_name}.lst"
    with open(lst_path, "w") as lst_file:
        for f in files:
            lst_file.write(f"{f['uri']}\n")

print("✅ .lst files created for train, development, and test splits.")


✅ Total processed files: 161

🎙️ Speaker instance counts in all splits:
  KCHI: 11595
  CHI: 701
  FEM: 6193
  MAL: 2447
  OVH: 3172
  SPEECH: 24108

📊 RTTM split durations and video counts:
  train: 146244.93 sec (80.1%), 98 videos
  dev: 18337.89 sec (10.0%), 23 videos
  test: 17952.87 sec (9.8%), 40 videos
✅ .lst files created for train, development, and test splits.


## Generate .uem file

In [3]:
# Generate .uem files for train, development, and test splits
for split_name, files in splits.items():
    uem_path = output_dir / f"{split_name}.uem"
    with open(uem_path, "w") as uem_file:
        for f in files:
            try:
                # Extract the URI and duration for each video
                uri = f["uri"]
                start = 0  # Start time is always 0
                end = f["duration"]  # End time is the video's duration

                # Write a single line for each video
                uem_line = f"{uri} 1 {start:.3f} {end:.3f}"
                uem_file.write(uem_line + "\n")
            except Exception as e:
                print(f"Error processing file {f['path']}: {e}")

print("✅ .uem files created for train, development, and test splits.")

✅ .uem files created for train, development, and test splits.


## Create rttm files per video file

In [4]:
from pathlib import Path
# Create a new folder to save individual RTTM files
individual_rttm_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens/rttm_per_video")
individual_rttm_dir.mkdir(exist_ok=True)
output_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens")

# Process each split's RTTM file
for split_name in ["train", "dev", "test"]:
    rttm_path = output_dir / f"{split_name}.rttm"
    try:
        with open(rttm_path, "r") as rttm_file:
            lines = rttm_file.readlines()

        # Group lines by video ID (URI)
        video_rttm_data = {}
        for line in lines:
            parts = line.split()
            if len(parts) < 3:
                continue
            uri = parts[1]  # Video ID
            if uri not in video_rttm_data:
                video_rttm_data[uri] = []
            video_rttm_data[uri].append(line)

        # Write each video's RTTM data to a separate file
        for uri, rttm_lines in video_rttm_data.items():
            video_rttm_path = individual_rttm_dir / f"{uri}.rttm"
            with open(video_rttm_path, "w") as video_rttm_file:
                video_rttm_file.writelines(rttm_lines)

    except Exception as e:
        print(f"Error processing RTTM file {rttm_path}: {e}")

print(f"✅ Individual RTTM files created in {individual_rttm_dir}.")

✅ Individual RTTM files created in /home/nele_pauline_suffo/ProcessedData/vtc_childlens/rttm_per_video.
