In [None]:
import os
import parselmouth
import numpy as np

# Define directories
audio_dir = "dataset/audio_segments"
transcripts_dir = "dataset/transcripts"
fully_annotated_transcripts_dir = "dataset/fully_annotated_transcripts"

# Ensure the fully annotated transcripts directory exists
os.makedirs(fully_annotated_transcripts_dir, exist_ok=True)

def extract_prosody_features(audio_path):
    sound = parselmouth.Sound(audio_path)
    pitch = sound.to_pitch()
    intensity = sound.to_intensity()

    pitch_values = pitch.selected_array['frequency']
    intensity_values = intensity.values.T[0]

    return pitch_values, intensity_values

def annotate_prosody(audio_file, transcript):
    pitch_values, intensity_values = extract_prosody_features(audio_file)

    # Calculate average pitch and intensity for annotation
    avg_pitch = np.mean(pitch_values)
    avg_intensity = np.mean(intensity_values)

    prosody_annotation = f"[Average Pitch: {avg_pitch:.2f} Hz] [Average Intensity: {avg_intensity:.2f} dB]"

    annotated_transcript = f"{prosody_annotation} {transcript}"
    return annotated_transcript

# Annotate each transcript with prosody features
for audio_file in os.listdir(audio_dir):
    if audio_file.endswith(".wav"):
        audio_path = os.path.join(audio_dir, audio_file)
        transcript_file = f"transcript_{audio_file.replace('.wav', '.txt')}"
        transcript_path = os.path.join(transcripts_dir, transcript_file)

        if os.path.exists(transcript_path):
            with open(transcript_path, "r") as f:
                transcript = f.read()

            annotated_transcript = annotate_prosody(audio_path, transcript)

            annotated_transcript_path = os.path.join(fully_annotated_transcripts_dir, transcript_file)
            with open(annotated_transcript_path, "w") as f:
                f.write(annotated_transcript)

print(f"Fully annotated transcripts saved in {fully_annotated_transcripts_dir}")
