## Generate GT RTTM File

In [2]:
import json
from pathlib import Path
from glob import glob
from collections import Counter
import pandas as pd

# Folder and output paths
input_folder = "/home/nele_pauline_suffo/ProcessedData/childlens_annotations"
output_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens")
output_dir.mkdir(exist_ok=True)

valid_action_names = {"Child Talking", "Other Person Talking", "Overheard Speech", "Singing/Humming"}

all_files = []
speaker_counts = Counter()
files_processed = 0

# Step 1: Load all JSON files and collect metadata
json_files = glob(f"{input_folder}/*.json")
for json_file in json_files:
    try:
        with open(json_file, "r") as f:
            annotations = json.load(f)
        uri = annotations['metadata']['name']
        duration = annotations['metadata']['duration'] / 1_000_000  # microseconds to seconds
        all_files.append({
            "path": json_file,
            "uri": uri,
            "duration": duration
        })
    except Exception as e:
        print(f"Skipping file {json_file} due to error: {e}")

# Step 2: Sort and split files by total duration
all_files.sort(key=lambda x: x["duration"], reverse=True)
total_duration = sum(f["duration"] for f in all_files)

train_duration, dev_duration, test_duration = 0, 0, 0
train_files, dev_files, test_files = [], [], []

for f in all_files:
    if train_duration < 0.8 * total_duration:
        train_files.append(f)
        train_duration += f["duration"]
    elif dev_duration < 0.1 * total_duration:
        dev_files.append(f)
        dev_duration += f["duration"]
    else:
        test_files.append(f)
        test_duration += f["duration"]

splits = {
    "train": train_files,
    "dev": dev_files,
    "test": test_files
}
split_durations = {
    "train": train_duration,
    "dev": dev_duration,
    "test": test_duration
}

# Step 3: Process each split and write RTTM
all_df_rows = [] # Initialize list for DataFrame rows
all_rttm_lines_combined = [] # Initialize list for all RTTM lines for the complete.rttm

for split_name, files_in_split in splits.items():
    rttm_lines_split = [] # RTTM lines for the current split

    for f_info in files_in_split:
        try:
            with open(f_info["path"], "r") as file_handle:
                annotations = json.load(file_handle)
            uri = annotations['metadata']['name']
            files_processed += 1
            
            for instance in annotations.get('instances', []):
                if instance["meta"]["type"] != "event":
                    continue
                try:
                    # Assuming the first parameter block contains the relevant timestamps and attributes
                    if not instance["parameters"] or not instance["parameters"][0].get("timestamps"):
                        continue
                    
    
                    parameter_block = instance["parameters"][0]
                    # Ensure 'timestamps' key exists and is a list
                    if not isinstance(parameter_block.get("timestamps"), list):
                        continue
                    
                    # The 'start' and 'end' for the whole instance parameter block
                    instance_start_time_us = parameter_block.get("start")
                    instance_end_time_us = parameter_block.get("end")
                    
                    if instance_start_time_us is None or instance_end_time_us is None:
                        pass
                    
                    for detail_idx, detail in enumerate(parameter_block["timestamps"]):
                        if "attributes" not in detail:
                            continue
                        
                        action_type = next(
                            (attr["name"] for attr in detail["attributes"]
                             if attr["groupName"] == "Type of Action"),
                            None
                        )  

                        if action_type in valid_action_names:
                            speaker_id = "NA"

                            if action_type in ["Child Talking", "Singing/Humming"]:
                                speaker_id = "KCHI"
                            elif action_type == "Other Person Talking":
                                age_group = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Age Group"),
                                    None
                                )
                                gender = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Gender"),
                                    None
                                )

                                if age_group in ["Child", "Infant"]:
                                    speaker_id = "CHI"
                                elif age_group in ["Adult", "Adolescent"]:
                                    if gender == "Female":
                                        speaker_id = "FEM"
                                    elif gender == "Male":
                                        speaker_id = "MAL"
                            elif action_type == "Overheard Speech":
                                speaker_id = "SPEECH"

                       
                            # Correctly use segment start/end from the parameter_block
                            # (which was referred to as 'timestamps' variable in original code)
                            segment_start_us = parameter_block.get("start")
                            segment_end_us = parameter_block.get("end")
                            
                            
                            if segment_start_us is None or segment_end_us is None:
                                print(f"Warning: Missing start/end in parameter block for instance in {f_info['path']}. Skipping.")
                                continue
                            
                            
                            start_sec = segment_start_us / 1_000_000
                            end_sec = segment_end_us / 1_000_000
                            duration_sec = end_sec - start_sec

                            if duration_sec <= 0: # Ensure duration is positive
                                print(f"Warning: Non-positive duration {duration_sec:.3f}s for segment in {f_info['path']}. Skipping.")
                                continue
                            
                            rttm_line = f"SPEAKER {uri} 1 {start_sec:.3f} {duration_sec:.3f} <NA> <NA> {speaker_id} <NA> <NA>"
                            rttm_line_speech = f"SPEAKER {uri} 1 {start_sec:.3f} {duration_sec:.3f} <NA> <NA> SPEECH <NA> <NA>"

                            if speaker_id != "NA":
                                rttm_lines_split.append(rttm_line)
                                all_rttm_lines_combined.append(rttm_line) # Add to combined list
                                rttm_lines_split.append(rttm_line_speech) # RTTM gets both lines
                                all_rttm_lines_combined.append(rttm_line_speech) # Add to combined list

                                speaker_counts[speaker_id] += 1    # Counts original speaker_id (e.g. OCH)
                                speaker_counts["SPEECH"] += 1

                                # Prepare data for DataFrame (only specific voice type, OCH mapped to CHI)
                                row_data_specific = {
                                    "audio_file_name": uri,
                                    "Utterance_Start": round(start_sec, 3),
                                    "Utterance_Duration": round(duration_sec, 3),
                                    "Voice_type": speaker_id, # This will be KCHI, CHI, FEM, MAL
                                    "Utterance_End": round(end_sec, 3)
                                }
                                all_df_rows.append(row_data_specific)
                                
                                # Add corresponding SPEECH entry for the DataFrame
                                row_data_speech = {
                                    "audio_file_name": uri,
                                    "Utterance_Start": round(start_sec, 3),
                                    "Utterance_Duration": round(duration_sec, 3),
                                    "Voice_type": "SPEECH", # Add the SPEECH category
                                    "Utterance_End": round(end_sec, 3)
                                }
                                all_df_rows.append(row_data_speech)
                                
                                # only use the first instance of each segment
                                break
                                
                except Exception as e:
                    print(f"Skipping instance in {f_info['path']} due to error: {e}")
        except Exception as e:
            print(f"Error reading file {f_info['path']}: {e}")

    # Save to RTTM file for the current split
    rttm_path_split = output_dir / f"{split_name}.rttm"
    with open(rttm_path_split, "w") as out_f:
        for line in rttm_lines_split:
            out_f.write(line + "\n")
    print(f"📝 RTTM file for {split_name} split saved to {rttm_path_split}")

# Save the complete RTTM file after processing all splits
complete_rttm_path = output_dir / "complete.rttm"
with open(complete_rttm_path, "w") as out_f:
    for line in all_rttm_lines_combined:
        out_f.write(line + "\n")
print(f"📝 Complete RTTM file for all splits saved to {complete_rttm_path}")

# Step 3: Process each split and write RTTM
all_df_rows = [] # Initialize list for DataFrame rows
all_rttm_lines_combined = [] # Initialize list for all RTTM lines for the complete.rttm

for split_name, files_in_split in splits.items():
    rttm_lines_split = [] # RTTM lines for the current split

    for f_info in files_in_split:
        try:
            with open(f_info["path"], "r") as file_handle:
                annotations = json.load(file_handle)
            uri = annotations['metadata']['name']
            files_processed += 1
            
            for instance in annotations.get('instances', []):
                if instance["meta"]["type"] != "event":
                    continue
                try:
                    # Assuming the first parameter block contains the relevant timestamps and attributes
                    if not instance["parameters"] or not instance["parameters"][0].get("timestamps"):
                        continue
                    
    
                    parameter_block = instance["parameters"][0]
                    # Ensure 'timestamps' key exists and is a list
                    if not isinstance(parameter_block.get("timestamps"), list):
                        continue
                    
                    # The 'start' and 'end' for the whole instance parameter block
                    instance_start_time_us = parameter_block.get("start")
                    instance_end_time_us = parameter_block.get("end")
                    
                    if instance_start_time_us is None or instance_end_time_us is None:
                        pass
                    
                    for detail_idx, detail in enumerate(parameter_block["timestamps"]):
                        if "attributes" not in detail:
                            continue
                        
                        action_type = next(
                            (attr["name"] for attr in detail["attributes"]
                             if attr["groupName"] == "Type of Action"),
                            None
                        )  

                        if action_type in valid_action_names:
                            speaker_id = "NA"

                            if action_type in ["Child Talking", "Singing/Humming"]:
                                speaker_id = "KCHI"
                            elif action_type == "Other Person Talking":
                                age_group = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Age Group"),
                                    None
                                )
                                gender = next(
                                    (attr["name"] for attr in detail["attributes"]
                                     if attr["groupName"] == "1st Person Gender"),
                                    None
                                )

                                if age_group in ["Child", "Infant"]:
                                    speaker_id = "CHI"
                                elif age_group in ["Adult", "Adolescent"]:
                                    if gender == "Female":
                                        speaker_id = "FEM"
                                    elif gender == "Male":
                                        speaker_id = "MAL"
                            elif action_type == "Overheard Speech":
                                speaker_id = "SPEECH"

                       
                            # Correctly use segment start/end from the parameter_block
                            # (which was referred to as 'timestamps' variable in original code)
                            segment_start_us = parameter_block.get("start")
                            segment_end_us = parameter_block.get("end")
                            
                            
                            if segment_start_us is None or segment_end_us is None:
                                print(f"Warning: Missing start/end in parameter block for instance in {f_info['path']}. Skipping.")
                                continue
                            
                            
                            start_sec = segment_start_us / 1_000_000
                            end_sec = segment_end_us / 1_000_000
                            duration_sec = end_sec - start_sec

                            if duration_sec <= 0: # Ensure duration is positive
                                print(f"Warning: Non-positive duration {duration_sec:.3f}s for segment in {f_info['path']}. Skipping.")
                                continue
                            
                            rttm_line = f"SPEAKER {uri} 1 {start_sec:.3f} {duration_sec:.3f} <NA> <NA> {speaker_id} <NA> <NA>"
                            rttm_line_speech = f"SPEAKER {uri} 1 {start_sec:.3f} {duration_sec:.3f} <NA> <NA> SPEECH <NA> <NA>"

                            if speaker_id != "NA":
                                rttm_lines_split.append(rttm_line)
                                all_rttm_lines_combined.append(rttm_line) # Add to combined list
                                rttm_lines_split.append(rttm_line_speech) # RTTM gets both lines
                                all_rttm_lines_combined.append(rttm_line_speech) # Add to combined list

                                speaker_counts[speaker_id] += 1    # Counts original speaker_id (e.g. OCH)
                                speaker_counts["SPEECH"] += 1

                                # Prepare data for DataFrame (only specific voice type, OCH mapped to CHI)
                                row_data_specific = {
                                    "audio_file_name": uri,
                                    "Utterance_Start": round(start_sec, 3),
                                    "Utterance_Duration": round(duration_sec, 3),
                                    "Voice_type": speaker_id, # This will be KCHI, CHI, FEM, MAL
                                    "Utterance_End": round(end_sec, 3)
                                }
                                all_df_rows.append(row_data_specific)
                                
                                # Add corresponding SPEECH entry for the DataFrame
                                row_data_speech = {
                                    "audio_file_name": uri,
                                    "Utterance_Start": round(start_sec, 3),
                                    "Utterance_Duration": round(duration_sec, 3),
                                    "Voice_type": "SPEECH", # Add the SPEECH category
                                    "Utterance_End": round(end_sec, 3)
                                }
                                all_df_rows.append(row_data_speech)
                                
                                # only use the first instance of each segment
                                break
                                
                except Exception as e:
                    print(f"Skipping instance in {f_info['path']} due to error: {e}")
        except Exception as e:
            print(f"Error reading file {f_info['path']}: {e}")

    # Save to RTTM file for the current split
    rttm_path_split = output_dir / f"{split_name}.rttm"
    with open(rttm_path_split, "w") as out_f:
        for line in rttm_lines_split:
            out_f.write(line + "\n")
    print(f"📝 RTTM file for {split_name} split saved to {rttm_path_split}")

# Save the complete RTTM file after processing all splits
complete_rttm_path = output_dir / "complete.rttm"
with open(complete_rttm_path, "w") as out_f:
    for line in all_rttm_lines_combined:
        out_f.write(line + "\n")
print(f"📝 Complete RTTM file for all splits saved to {complete_rttm_path}")

# Create and save ONE COMBINED DataFrame after processing all splits
if all_df_rows:
    combined_df = pd.DataFrame(all_df_rows)
    # Ensure desired column order
    combined_df = combined_df[["audio_file_name", "Utterance_Start", "Utterance_Duration", "Voice_type", "Utterance_End"]]
    # Define the single output path for the combined pickle file
    df_pkl_path = Path("/home/nele_pauline_suffo/ProcessedData/childlens_annotations/processed/childlens_annotations_gt.pkl")
    # Ensure the directory exists
    df_pkl_path.parent.mkdir(parents=True, exist_ok=True)
    combined_df.to_pickle(df_pkl_path)
    print(f"✅ Combined DataFrame for all splits saved to {df_pkl_path} ({len(combined_df)} rows)")
else:
    print(f"ℹ️ No data to create combined DataFrame.")

# Step 3.5: Save a complete UEM file with all video information
uem_lines = []
for f in all_files:
    uri = f["uri"]
    start_time = 0.000
    end_time = f["duration"]
    uem_lines.append(f"{uri} 1 {start_time:.3f} {end_time:.3f}")

uem_path = output_dir / "complete.uem"
with open(uem_path, "w") as uem_file:
    for line in uem_lines:
        uem_file.write(line + "\n")
print(f"✅ Combined UEM file for all videos saved to {uem_path} ({len(uem_lines)} segments)")

# Step 4: Summary logs
print(f"\n✅ Total processed files: {files_processed}")
print("\n🎙️ Speaker instance counts in all splits:")
for speaker_id in ['KCHI', 'CHI', 'FEM', 'MAL', 'SPEECH']:
    print(f"  {speaker_id}: {speaker_counts[speaker_id]}")

print("\n📊 RTTM split durations and video counts:")
for split_name in ["train", "dev", "test"]:
    dur = split_durations[split_name]
    perc = (dur / total_duration) * 100
    count = len(splits[split_name])
    print(f"  {split_name}: {dur:.2f} sec ({perc:.1f}%), {count} videos")

# Generate .lst files for train, development, and test splits
for split_name, files in splits.items():
    lst_path = output_dir / f"{split_name}.lst"
    with open(lst_path, "w") as lst_file:
        for f in files:
            lst_file.write(f"{f['uri']}\n")

print("✅ .lst files created for train, development, and test splits.")

# Generate .uem files for train, development, and test splits
for split_name, files in splits.items():
    uem_path = output_dir / f"{split_name}.uem"
    with open(uem_path, "w") as uem_file:
        for f in files:
            try:
                # Extract the URI and duration for each video
                uri = f["uri"]
                start = 0  # Start time is always 0
                end = f["duration"]  # End time is the video's duration

                # Write a single line for each video
                uem_line = f"{uri} 1 {start:.3f} {end:.3f}"
                uem_file.write(uem_line + "\n")
            except Exception as e:
                print(f"Error processing file {f['path']}: {e}")

print("✅ .uem files created for train, development, and test splits.")

📝 RTTM file for train split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/train.rttm
📝 RTTM file for dev split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/dev.rttm
📝 RTTM file for test split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/test.rttm
📝 Complete RTTM file for all splits saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/complete.rttm
📝 RTTM file for train split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/train.rttm
📝 RTTM file for dev split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/dev.rttm
📝 RTTM file for test split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/test.rttm
📝 Complete RTTM file for all splits saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens/complete.rttm
✅ Combined DataFrame for all splits saved to /home/nele_pauline_suffo/ProcessedData/childlens_annotations/processed/childlens_annotations_gt.pkl (48216 rows)
✅ Combined UEM file for 

## Generate GT RTTM File for ChildLens_v2 VTC

In [4]:
import json
from pathlib import Path
from glob import glob
from collections import Counter
import pandas as pd

# Folder and output paths
input_folder = "/home/nele_pauline_suffo/ProcessedData/childlens_annotations"
output_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens_v2")
output_dir.mkdir(exist_ok=True)

valid_action_names = {"Child Talking", "Other Person Talking", "Overheard Speech", "Singing/Humming"}

all_files = []
speaker_counts = Counter()
files_processed = 0

# Step 1: Load all JSON files and collect metadata
json_files = glob(f"{input_folder}/*.json")
for json_file in json_files:
    try:
        with open(json_file, "r") as f:
            annotations = json.load(f)
        uri = annotations['metadata']['name']
        duration = annotations['metadata']['duration'] / 1_000_000  # microseconds to seconds
        all_files.append({
            "path": json_file,
            "uri": uri,
            "duration": duration
        })
    except Exception as e:
        print(f"Skipping file {json_file} due to error: {e}")

# Step 2: Sort and split files by total duration
all_files.sort(key=lambda x: x["duration"], reverse=True)
total_duration = sum(f["duration"] for f in all_files)

train_duration, dev_duration, test_duration = 0, 0, 0
train_files, dev_files, test_files = [], [], []

for f in all_files:
    if train_duration < 0.8 * total_duration:
        train_files.append(f)
        train_duration += f["duration"]
    elif dev_duration < 0.1 * total_duration:
        dev_files.append(f)
        dev_duration += f["duration"]
    else:
        test_files.append(f)
        test_duration += f["duration"]

splits = {
    "train": train_files,
    "dev": dev_files,
    "test": test_files
}
split_durations = {
    "train": train_duration,
    "dev": dev_duration,
    "test": test_duration
}

# Step 3: Process each split and write RTTM
all_df_rows = [] # Initialize list for DataFrame rows
all_rttm_lines_combined_v2 = [] # Initialize list for all RTTM lines for the complete_v2.rttm

for split_name, files_in_split in splits.items():
    rttm_lines = [] # RTTM lines for the current split (v2 logic)

    for f_info in files_in_split:
        try:
            with open(f_info["path"], "r") as file_handle:
                annotations = json.load(file_handle)
            uri = annotations['metadata']['name']
            files_processed += 1
            
            for instance in annotations.get('instances', []):
                if instance["meta"]["type"] != "event":
                    continue
                try:
                    # Assuming the first parameter block contains the relevant timestamps and attributes
                    if not instance["parameters"] or not instance["parameters"][0].get("timestamps"):
                        continue
                    
    
                    parameter_block = instance["parameters"][0]
                    # Ensure 'timestamps' key exists and is a list
                    if not isinstance(parameter_block.get("timestamps"), list):
                        continue
                    
                    # The 'start' and 'end' for the whole instance parameter block
                    instance_start_time_us = parameter_block.get("start")
                    instance_end_time_us = parameter_block.get("end")
                    
                    if instance_start_time_us is None or instance_end_time_us is None:
                        pass
                    
                    for detail_idx, detail in enumerate(parameter_block["timestamps"]):
                        if "attributes" not in detail:
                            continue
                        
                        action_type = next(
                            (attr["name"] for attr in detail["attributes"]
                             if attr["groupName"] == "Type of Action"),
                            None
                        )  

                        if action_type in valid_action_names:
                            speaker_id = "NA"

                            if action_type in ["Child Talking", "Singing/Humming"]:
                                speaker_id = "KCHI"
                            elif action_type == "Other Person Talking":
                                speaker_id = "CDS" # Child Directed Speech
                            elif action_type == "Overheard Speech":
                                speaker_id = "OHS"

                            # Correctly use segment start/end from the parameter_block
                            # (which was referred to as 'timestamps' variable in original code)
                            segment_start_us = parameter_block.get("start")
                            segment_end_us = parameter_block.get("end")
                            
                            
                            if segment_start_us is None or segment_end_us is None:
                                print(f"Warning: Missing start/end in parameter block for instance in {f_info['path']}. Skipping.")
                                continue
                            
                            
                            start_sec = segment_start_us / 1_000_000
                            end_sec = segment_end_us / 1_000_000
                            duration_sec = end_sec - start_sec

                            if duration_sec <= 0: # Ensure duration is positive
                                print(f"Warning: Non-positive duration {duration_sec:.3f}s for segment in {f_info['path']}. Skipping.")
                                continue
                            
                            rttm_line = f"SPEAKER {uri} 1 {start_sec:.3f} {duration_sec:.3f} <NA> <NA> {speaker_id} <NA> <NA>"
                            rttm_line_speech = f"SPEAKER {uri} 1 {start_sec:.3f} {duration_sec:.3f} <NA> <NA> SPEECH <NA> <NA>"

                            if speaker_id != "NA":
                                rttm_lines.append(rttm_line)
                                all_rttm_lines_combined_v2.append(rttm_line) # Add to combined list for v2
                                if speaker_id == "KCHI":
                                    rttm_lines.append(rttm_line_speech)
                                    all_rttm_lines_combined_v2.append(rttm_line_speech) # Add to combined list for v2
                                    speaker_counts["SPEECH"] += 1
                                speaker_counts[speaker_id] += 1
                                
                                # Prepare data for DataFrame (only specific voice type
                                row_data_specific = {
                                    "audio_file_name": uri,
                                    "Utterance_Start": round(start_sec, 3),
                                    "Utterance_Duration": round(duration_sec, 3),
                                    "Voice_type": speaker_id, 
                                    "Utterance_End": round(end_sec, 3)
                                }
                                all_df_rows.append(row_data_specific)
                                
                                # only use the first instance of each segment
                                break
                                
                except Exception as e:
                    print(f"Skipping instance in {f_info['path']} due to error: {e}")
        except Exception as e:
            print(f"Error reading file {f_info['path']}: {e}")

    # Save to RTTM file for the current split
    rttm_path = output_dir / f"{split_name}.rttm" # This is vtc_childlens_v2
    with open(rttm_path, "w") as out_f:
        for line in rttm_lines:
            out_f.write(line + "\n")
    print(f"📝 RTTM file for {split_name} split saved to {rttm_path}")

# Save the complete RTTM file for v2 after processing all splits
complete_rttm_path_v2 = output_dir / "complete.rttm" # This is vtc_childlens_v2
with open(complete_rttm_path_v2, "w") as out_f:
    for line in all_rttm_lines_combined_v2:
        out_f.write(line + "\n")
print(f"📝 Complete RTTM file for v2 (all splits) saved to {complete_rttm_path_v2}")

# Create and save ONE COMBINED DataFrame after processing all splits
if all_df_rows:
    combined_df = pd.DataFrame(all_df_rows)
    # Ensure desired column order
    combined_df = combined_df[["audio_file_name", "Utterance_Start", "Utterance_Duration", "Voice_type", "Utterance_End"]]
    # Define the single output path for the combined pickle file
    df_pkl_path = Path("/home/nele_pauline_suffo/ProcessedData/childlens_annotations/processed/childlens_annotations_gt_v2.pkl")
    # Ensure the directory exists
    df_pkl_path.parent.mkdir(parents=True, exist_ok=True)
    combined_df.to_pickle(df_pkl_path)
    print(f"✅ Combined DataFrame for all splits saved to {df_pkl_path} ({len(combined_df)} rows)")
else:
    print(f"ℹ️ No data to create combined DataFrame.")

# Step 3.5: Save a complete UEM file with all video information
uem_lines = []
for f in all_files:
    uri = f["uri"]
    start_time = 0.000
    end_time = f["duration"]
    uem_lines.append(f"{uri} 1 {start_time:.3f} {end_time:.3f}")

uem_path = output_dir / "complete.uem"
with open(uem_path, "w") as uem_file:
    for line in uem_lines:
        uem_file.write(line + "\n")
print(f"✅ Combined UEM file for all videos saved to {uem_path} ({len(uem_lines)} segments)")

# Step 4: Summary logs
print(f"\n✅ Total processed files: {files_processed}")
print("\n🎙️ Speaker instance counts in all splits:")
for speaker_id in ['KCHI', 'SPEECH']:
    print(f"  {speaker_id}: {speaker_counts[speaker_id]}")

print("\n📊 RTTM split durations and video counts:")
for split_name in ["train", "dev", "test"]:
    dur = split_durations[split_name]
    perc = (dur / total_duration) * 100
    count = len(splits[split_name])
    print(f"  {split_name}: {dur:.2f} sec ({perc:.1f}%), {count} videos")

# Generate .lst files for train, development, and test splits
for split_name, files in splits.items():
    lst_path = output_dir / f"{split_name}.lst"
    with open(lst_path, "w") as lst_file:
        for f in files:
            lst_file.write(f"{f['uri']}\n")

print("✅ .lst files created for train, development, and test splits.")

# Generate .uem files for train, development, and test splits
for split_name, files in splits.items():
    uem_path = output_dir / f"{split_name}.uem"
    with open(uem_path, "w") as uem_file:
        for f in files:
            try:
                # Extract the URI and duration for each video
                uri = f["uri"]
                start = 0  # Start time is always 0
                end = f["duration"]  # End time is the video's duration

                # Write a single line for each video
                uem_line = f"{uri} 1 {start:.3f} {end:.3f}"
                uem_file.write(uem_line + "\n")
            except Exception as e:
                print(f"Error processing file {f['path']}: {e}")

print("✅ .uem files created for train, development, and test splits.")

📝 RTTM file for train split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens_v2/train.rttm
📝 RTTM file for dev split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens_v2/dev.rttm
📝 RTTM file for test split saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens_v2/test.rttm
📝 Complete RTTM file for v2 (all splits) saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens_v2/complete.rttm
✅ Combined DataFrame for all splits saved to /home/nele_pauline_suffo/ProcessedData/childlens_annotations/processed/childlens_annotations_gt_v2.pkl (24150 rows)
✅ Combined UEM file for all videos saved to /home/nele_pauline_suffo/ProcessedData/vtc_childlens_v2/complete.uem (161 segments)

✅ Total processed files: 161

🎙️ Speaker instance counts in all splits:
  KCHI: 11595
  SPEECH: 11595

📊 RTTM split durations and video counts:
  train: 146244.93 sec (80.1%), 98 videos
  dev: 18337.89 sec (10.0%), 23 videos
  test: 17952.87 sec (9.8%), 40 videos
✅ .lst files created

## Create rttm files per video file

In [4]:
from pathlib import Path
# Create a new folder to save individual RTTM files
individual_rttm_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens/rttm_per_video")
individual_rttm_dir.mkdir(exist_ok=True)
output_dir = Path("/home/nele_pauline_suffo/ProcessedData/vtc_childlens")

# Process each split's RTTM file
for split_name in ["train", "dev", "test"]:
    rttm_path = output_dir / f"{split_name}.rttm"
    try:
        with open(rttm_path, "r") as rttm_file:
            lines = rttm_file.readlines()

        # Group lines by video ID (URI)
        video_rttm_data = {}
        for line in lines:
            parts = line.split()
            if len(parts) < 3:
                continue
            uri = parts[1]  # Video ID
            if uri not in video_rttm_data:
                video_rttm_data[uri] = []
            video_rttm_data[uri].append(line)

        # Write each video's RTTM data to a separate file
        for uri, rttm_lines in video_rttm_data.items():
            video_rttm_path = individual_rttm_dir / f"{uri}.rttm"
            with open(video_rttm_path, "w") as video_rttm_file:
                video_rttm_file.writelines(rttm_lines)

    except Exception as e:
        print(f"Error processing RTTM file {rttm_path}: {e}")

print(f"✅ Individual RTTM files created in {individual_rttm_dir}.")

✅ Individual RTTM files created in /home/nele_pauline_suffo/ProcessedData/vtc_childlens/rttm_per_video.
