In [1]:
import os
import re
import pandas as pd
from pathlib import Path
import sys # Import sys for exit

# --- Configuration ---
# Set the directory containing your SRT files
SRT_DIRECTORY = Path("./srt_files")  # <--- CHANGE THIS to your folder path

# Set the directory where the output Excel files will be saved
OUTPUT_DIRECTORY = Path("./output_ratings") # <--- CHANGE THIS if desired

# --- Script Logic ---

def parse_srt_file(file_path):
    """
    Parses an SRT file to extract turn ID, start time, end time, and rating
    for each subtitle block where the text is a rating in single quotes.

    Args:
        file_path (Path): The path to the SRT file.

    Returns:
        list: A list of dictionaries, each containing 'Turn ID', 'Start Time',
              'End Time', and 'Rating' for a subtitle block. Returns an empty
              list if errors occur or no matching blocks are found.
    """
    extracted_data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

            # Regex to find SRT blocks with a rating ('number') as the text
            # Group 1: Turn ID (\d+)
            # Group 2: Start Time (\d{2}:\d{2}:\d{2},\d{3})
            # Group 3: End Time (\d{2}:\d{2}:\d{2},\d{3})
            # Group 4: Rating (\d+) inside single quotes
            # Handles potential whitespace variations
            pattern = re.compile(
                r"(\d+)\s*\n"                                      # Turn ID line
                r"(\d{2}:\d{2}:\d{2},\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2},\d{3})\s*\n" # Timestamp line
                r"'(\d+)'"                                         # Rating line ('number')
                # r"(.+?)\n\n", re.DOTALL # Alternative if text isn't *just* the rating
            )

            matches = pattern.findall(content)

            for match in matches:
                turn_id, start_time, end_time, rating = match
                extracted_data.append({
                    'Turn ID': int(turn_id), # Store as integer
                    'Start Time': start_time,
                    'End Time': end_time,
                    'Rating': int(rating) # Store rating as integer
                })

    except FileNotFoundError:
        print(f"Error: File not found - {file_path}")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
    
    return extracted_data

# --- Main Execution ---

# Ensure the input directory exists
if not SRT_DIRECTORY.is_dir():
    print(f"Error: Input directory not found - {SRT_DIRECTORY}")
    print("Please create the directory and place your SRT files inside, or update the SRT_DIRECTORY path.")
    sys.exit(1) # Use sys.exit for a cleaner exit

# Create the output directory if it doesn't exist
try:
    OUTPUT_DIRECTORY.mkdir(parents=True, exist_ok=True)
except Exception as e:
    print(f"Error: Could not create output directory - {OUTPUT_DIRECTORY}: {e}")
    sys.exit(1)


# Get SRT files and sort them for consistent order
srt_files = sorted(list(SRT_DIRECTORY.glob("*.srt")))

if not srt_files:
    print(f"No .srt files found in directory: {SRT_DIRECTORY}")
    sys.exit(0) # No files is not an error, just nothing to do

print(f"Found {len(srt_files)} SRT files in {SRT_DIRECTORY}. Processing...")

processed_count = 0
error_count = 0

for srt_file in srt_files:
    print(f"\nProcessing: {srt_file.name}")
    
    # Parse the current SRT file
    turn_data = parse_srt_file(srt_file)

    if not turn_data:
        print(f"  No valid rating entries found or error processing file: {srt_file.name}")
        error_count += 1
        continue # Skip to the next file

    # Create a DataFrame for this file's data
    df = pd.DataFrame(turn_data)
    
    # Define the output Excel filename based on the input SRT filename
    # Example: input 'personA.srt' -> output 'personA_ratings.xlsx'
    output_filename = OUTPUT_DIRECTORY / f"{srt_file.stem}_ratings.xlsx" 

    # --- Save to Excel ---
    try:
        # index=False prevents writing the DataFrame index as a column
        # engine='openpyxl' is required for .xlsx format
        df.to_excel(output_filename, index=False, engine='openpyxl') 
        print(f"  Successfully saved data to: {output_filename}")
        processed_count += 1
    except Exception as e:
        print(f"  Error saving Excel file {output_filename}: {e}")
        print("  Make sure you have 'openpyxl' installed (`pip install openpyxl`)")
        error_count += 1

print(f"\n--- Processing Complete ---")
print(f"Successfully processed and saved data for {processed_count} files.")
if error_count > 0:
    print(f"Encountered errors or found no data in {error_count} files.")

Found 39 SRT files in srt_files. Processing...

Processing: P10_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P10_1_annotation_EL_ratings.xlsx

Processing: P11_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P11_1_annotation_EL_ratings.xlsx

Processing: P12_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P12_1_annotation_EL_ratings.xlsx

Processing: P13_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P13_1_annotation_EL_ratings.xlsx

Processing: P14_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P14_1_annotation_EL_ratings.xlsx

Processing: P15_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P15_1_annotation_EL_ratings.xlsx

Processing: P16_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P16_1_annotation_EL_ratings.xlsx

Processing: P17_1_annotation_EL.srt
  Successfully saved data to: output_ratings/P17_1_annotation_EL_ratings.xlsx

Processing: P18_1_annotation_EL.

In [3]:
import pandas as pd
import os

# Path to the Excel file
excel_file_path = 'user-self-reports/target-enjoyment.xlsx'

# Import the target-enjoyment tab
target_enjoyment_df = pd.read_excel(excel_file_path, sheet_name='target-enjoyment', header=1)

# Display the first few rows to verify
print(target_enjoyment_df.head())

os.makedirs("exchange-data", exist_ok=True)

for person in target_enjoyment_df["PID"]:
    first_interaction_robot = target_enjoyment_df.loc[target_enjoyment_df["PID"] == person, "Q1-Robot"].values[0]
    
    transcript_path = f"11l-corrected-transcripts/audio-{first_interaction_robot}-P{person}.txt"
    with open(transcript_path, "r") as transcript:
        transcript_lines = transcript.readlines()

        exchanges = {}
        utterance_index = 0
        for line in transcript_lines:
            if (not "[Robot]" in line) and (not "[User]" in line):
                if line.strip() == "":
                    continue
                else:
                    exchanges[utterance_index][2] += line.strip() + " "
            else:
                utterance_index += 1

                time_part = line.split("[")[0].strip()  # Get "00:00:21,459 --> 00:00:25,439"
                start_time, end_time = time_part.split("-->")
                start_time = start_time.strip()  # "00:00:21,459"
                end_time = end_time.strip()  # "00:00:25,439"
                if "[Robot]" in line:
                    exchanges[utterance_index] = [start_time, end_time, "Robot: "]
                else:
                    exchanges[utterance_index] = [start_time, end_time, "User: "]
    # Create a DataFrame from the exchange data
    exchange_df = pd.DataFrame(exchanges).T
    exchange_df.columns = ["Start Time", "End Time", "Utterance"]
    # Save the DataFrame to an Excel file
    # exchange_df.to_excel(f"exchange-data/exchange_data-P{person}-{first_interaction_robot}.xlsx", index=False)

   PID  Condition Q1-Robot Q2-Robot  Q1-1  Q1-2  Q2-1  Q2-2  Q3-1  Q3-2  Q4-1  \
0    4          3    Alice    Clara     3     5     5     6     2     5     2   
1    5          2    Clara    Alice     6     6     6     5     6     5     6   
2    6          4    Clara    Alice     6     5     7     6     7     3     6   
3    7          2    Clara    Alice     7     7     7     7     7     7     7   
4    8          3    Alice    Clara     7     7     6     7     6     7     6   

   Q4-2  Q5-1  Q5-2  Q6-1  Q6-2  Q7-1  Q7-2  
0     5     5     6     5     6     2     1  
1     5     7     6     6     4     1     2  
2     5     7     4     6     6     1     1  
3     7     7     7     7     7     1     1  
4     7     6     6     7     7     1     1  


In [48]:
from datetime import datetime, time
import os
import pandas as pd
import re
import subprocess
from pathlib import Path

def parse_time(time_str):
    """Convert time string to datetime object."""
    time_str = time_str.replace(",", ".")
    _, minutes, seconds = time_str.split(":")
    return float(minutes) * 60 + float(seconds)

buffer = 5.0  # Buffer time in seconds

for file in os.listdir("output_ratings"):
    person = file.split("_")[0].strip("P")
    print(f"Processing file: {file} for person {person}")
    if file.endswith(".xlsx"):
        file_path = os.path.join("output_ratings", file)
        df = pd.read_excel(file_path)
        df["Exchange text"] = ""
        # Find the corresponding exchange text
        first_interaction_robot = target_enjoyment_df.loc[target_enjoyment_df["PID"] == int(person), "Q1-Robot"].values[0]
        exchange_text_df = pd.read_excel(f"exchange-data/exchange_data-P{person}-{first_interaction_robot}.xlsx")
        for i, row in df.iterrows():
            start_time = parse_time(row["Start Time"])
            end_time = parse_time(row["End Time"])
            all_user_utterances = []
            turn = row["Turn ID"]
            last = "r"

            for j, exchange_row in exchange_text_df.iterrows():
                
                if (start_time < parse_time(exchange_row["Start Time"])) and (end_time > parse_time(exchange_row["End Time"])):
                            df.at[i, "Exchange text"] += exchange_row["Utterance"] + " "
                            if exchange_row["Utterance"].startswith("User: "):
                                last = "u"
                                all_user_utterances.append({"start": exchange_row["Start Time"], "end": exchange_row["End Time"], "text": exchange_row["Utterance"], "length": len(exchange_row["Utterance"])})
                            else:
                                last = "r"
                elif (start_time < parse_time(exchange_row["Start Time"]) + buffer) and (end_time > parse_time(exchange_row["End Time"])) and (parse_time(exchange_row["Start Time"]) < end_time):
                        if exchange_row["Utterance"].startswith("Robot: "):
                            df.at[i, "Exchange text"] += exchange_row["Utterance"] + " "
                            last = "r"
                elif (start_time < parse_time(exchange_row["Start Time"])) and (end_time > parse_time(exchange_row["End Time"]) - buffer):
                        if exchange_row["Utterance"].startswith("User: ") and last == "r":
                            df.at[i, "Exchange text"] += exchange_row["Utterance"] + " "
                            all_user_utterances.append({"start": exchange_row["Start Time"], "end": exchange_row["End Time"], "text": exchange_row["Utterance"], "length": len(exchange_row["Utterance"])})
                            last = "u"

            if len(all_user_utterances) > 0:
                # Sort by length of utterance
                all_user_utterances = sorted(all_user_utterances, key=lambda x: x["length"], reverse=True)
                # Get the longest user utterance
                longest_user_utterance = all_user_utterances[0]
                # Trim the longest user utterance audio clip
                first_interaction_robot = target_enjoyment_df.loc[target_enjoyment_df["PID"] == int(person), "Q1-Robot"].values[0]
                audio_file = f"data/raw/P{person}/audio-{first_interaction_robot}-P{person}.wav"
                start_time = longest_user_utterance["start"].replace(",", ".")
                end_time = longest_user_utterance["end"].replace(",", ".")
                # Trim the audio file using ffmpeg
                output_dir = f"data/audio/P{person}"
                os.makedirs(output_dir, exist_ok=True)
                print(f"Trimming audio file: {audio_file} from {start_time} to {end_time}")
                trimmed_audio_file = f"data/audio/P{person}/trimmed_audio-P{person}-{first_interaction_robot}-{turn}.wav"
                print(start_time, end_time)
                cmd = [
                    "ffmpeg",
                    "-ss", start_time,
                    "-i", audio_file,
                    "-t", str(parse_time(end_time) - parse_time(start_time)),
                    "-c", "copy",
                    trimmed_audio_file
                ]
                # run the command; raise CalledProcessError on failure
                try:
                    result = subprocess.run(
                        cmd,
                        check=True,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        text=True
                    )
                    print("Trimmed successfully:", trimmed_audio_file)
                except subprocess.CalledProcessError as e:
                    # print ffmpeg’s full error output
                    print("ffmpeg failed with exit code", e.returncode)
                    print("ffmpeg stderr:\n", e.stderr)
                
        os.makedirs(f"data/text/P{person}", exist_ok=True)
        # Save the updated DataFrame back to Excel
        df.to_excel(f"data/text/P{person}/text-aligned-P{person}.xlsx", index=False)
        # Print the number of entries in the processed file
        print(f"Processed {file} with {len(df)} entries.")

Processing file: P41_1_annotation_EL_ratings.xlsx for person 41
Trimming audio file: data/raw/P41/audio-Clara-P41.wav from 00:00:09.579 to 00:00:15.399
00:00:09.579 00:00:15.399
Trimmed successfully: data/audio/P41/trimmed_audio-P41-Clara-1.wav
Trimming audio file: data/raw/P41/audio-Clara-P41.wav from 00:00:22.219 to 00:00:24.399
00:00:22.219 00:00:24.399
Trimmed successfully: data/audio/P41/trimmed_audio-P41-Clara-2.wav
Trimming audio file: data/raw/P41/audio-Clara-P41.wav from 00:00:36.680 to 00:00:42.759
00:00:36.680 00:00:42.759
Trimmed successfully: data/audio/P41/trimmed_audio-P41-Clara-3.wav
Trimming audio file: data/raw/P41/audio-Clara-P41.wav from 00:00:50.959 to 00:00:53.299
00:00:50.959 00:00:53.299
Trimmed successfully: data/audio/P41/trimmed_audio-P41-Clara-4.wav
Trimming audio file: data/raw/P41/audio-Clara-P41.wav from 00:01:00.879 to 00:01:10.219
00:01:00.879 00:01:10.219
Trimmed successfully: data/audio/P41/trimmed_audio-P41-Clara-5.wav
Trimming audio file: data/raw/P

In [54]:
import webrtcvad
import collections
import contextlib
import wave
import pydub
import argparse
import os

def trim_silence(audio_path, output_path, aggressiveness=2, frame_duration_ms=30, padding_duration_ms=500):
    """
    Trims silence from the beginning and end of an audio file using VAD.

    Args:
        audio_path (str): Path to the input audio file.
        output_path (str): Path to save the trimmed audio file.
        aggressiveness (int): VAD aggressiveness mode (0-3). Higher values are less
                             likely to classify non-speech as speech. Default: 1.
        frame_duration_ms (int): Duration of each audio frame for VAD (10, 20, or 30). Default: 30.
        padding_duration_ms (int): Duration of silence to keep before the first speech
                                   and after the last speech segment (in ms). Default: 300.

    Returns:
        bool: True if trimming was successful and file saved, False otherwise.
    """
    print(f"Loading audio file: {audio_path}")
    try:
        # Load audio using pydub - handles format conversion
        audio = pydub.AudioSegment.from_file(audio_path)
    except FileNotFoundError:
        print(f"Error: Input file not found at {audio_path}")
        return False
    except Exception as e:
        print(f"Error loading audio file: {e}")
        return False

    # Ensure audio is mono and 16-bit PCM for webrtcvad compatibility
    # webrtcvad supports 8000, 16000, 32000, 48000 Hz sample rates
    sample_rate = audio.frame_rate
    supported_rates = [8000, 16000, 32000, 48000]
    if sample_rate not in supported_rates:
        # Choose the closest supported rate (preferring higher rates for quality)
        target_rate = 32000
        print(f"Warning: Sample rate {sample_rate}Hz not directly supported by webrtcvad. Resampling to {target_rate}Hz.")
        try:
            audio = audio.set_frame_rate(target_rate)
            sample_rate = target_rate
        except Exception as e:
            print(f"Error resampling audio: {e}")
            return False

    if audio.channels > 1:
        print(f"Converting audio to mono.")
        audio = audio.set_channels(1)

    if audio.sample_width != 2: # 2 bytes = 16 bits
        print(f"Converting audio to 16-bit PCM.")
        audio = audio.set_sample_width(2)

    print(f"Audio Properties for VAD: Rate={sample_rate}Hz, Channels={audio.channels}, SampleWidth={audio.sample_width} bytes")

    vad = webrtcvad.Vad(aggressiveness)

    # Calculate frame size in bytes
    bytes_per_sample = audio.sample_width
    samples_per_frame = int(sample_rate * frame_duration_ms / 1000)
    frame_size_bytes = samples_per_frame * bytes_per_sample

    # Get raw audio data
    raw_audio_data = audio.raw_data

    num_frames = len(raw_audio_data) // frame_size_bytes
    print(f"Processing {num_frames} frames of {frame_duration_ms}ms each...")

    speech_start_ms = -1
    speech_end_ms = -1
    found_speech = False

    for i in range(num_frames):
        start_byte = i * frame_size_bytes
        end_byte = start_byte + frame_size_bytes
        frame = raw_audio_data[start_byte:end_byte]

        # Ensure the frame has the correct number of bytes (important for the last frame)
        if len(frame) < frame_size_bytes:
            # Pad the last frame with silence if necessary
            # frame += b'\x00' * (frame_size_bytes - len(frame)) # Alternatively, skip last frame
            continue # Skip incomplete frame at the end

        try:
            is_speech = vad.is_speech(frame, sample_rate)
            current_time_ms = i * frame_duration_ms

            if is_speech:
                if not found_speech:
                    speech_start_ms = current_time_ms
                    found_speech = True
                # Always update the end time when speech is detected
                speech_end_ms = current_time_ms + frame_duration_ms # End time is start + duration

        except Exception as e:
            # webrtcvad can sometimes throw errors on invalid frame lengths etc.
            print(f"Error processing frame {i}: {e}")
            continue

    if not found_speech:
        print("No speech detected in the audio file.")
        # Optionally save the original or an empty file, here we just return False
        return False

    # Apply padding
    start_trim_ms = max(0, speech_start_ms - padding_duration_ms)
    end_trim_ms = min(len(audio), speech_end_ms + padding_duration_ms) # len(audio) is in ms

    print(f"Detected speech from {speech_start_ms}ms to {speech_end_ms}ms")
    print(f"Trimming audio from {start_trim_ms}ms to {end_trim_ms}ms (including padding)")

    # Trim the audio using pydub slicing
    trimmed_audio = audio[start_trim_ms:end_trim_ms]

    # Export the trimmed audio
    try:
        # Determine output format from the output file extension
        output_format = os.path.splitext(output_path)[1][1:]
        if not output_format: # Default to wav if no extension
            output_format = "wav"
            output_path += ".wav"
            print(f"No output format specified, defaulting to WAV. Saving to {output_path}")

        print(f"Saving trimmed audio to: {output_path} (Format: {output_format})")
        trimmed_audio.export(output_path, format=output_format)
        return True
    except Exception as e:
        print(f"Error saving trimmed audio file: {e}")
        return False

# --- Main Execution ---
if __name__ == "__main__":

    for person in range(4, 43):
        input_dir = f"data/audio/P{person}"
        output_dir = f"data/audio_vad_500/P{person}"
        # Create output directory if it doesn't exist

        if output_dir and not os.path.exists(output_dir):
            print(f"Creating output directory: {output_dir}")
            os.makedirs(output_dir)

        for file in os.listdir(input_dir):
            if file.endswith(".wav"):
                input_path = os.path.join(input_dir, file)
                output_path = os.path.join(output_dir, file)
                print(f"Trimming silence from {input_path} to {output_path}")
                success = trim_silence(
                    audio_path=input_path,
                    output_path=output_path,
                )
                if success:
                    print("Trimming complete.")
                else:
                    print("Trimming failed.")

Creating output directory: data/audio_vad_500/P4
Trimming silence from data/audio/P4/trimmed_audio-P4-Alice-20.wav to data/audio_vad_500/P4/trimmed_audio-P4-Alice-20.wav
Loading audio file: data/audio/P4/trimmed_audio-P4-Alice-20.wav
Converting audio to mono.
Audio Properties for VAD: Rate=32000Hz, Channels=1, SampleWidth=2 bytes
Processing 202 frames of 30ms each...
Detected speech from 1230ms to 6060ms
Trimming audio from 730ms to 6060ms (including padding)
Saving trimmed audio to: data/audio_vad_500/P4/trimmed_audio-P4-Alice-20.wav (Format: wav)
Trimming complete.
Trimming silence from data/audio/P4/trimmed_audio-P4-Alice-21.wav to data/audio_vad_500/P4/trimmed_audio-P4-Alice-21.wav
Loading audio file: data/audio/P4/trimmed_audio-P4-Alice-21.wav
Converting audio to mono.
Audio Properties for VAD: Rate=32000Hz, Channels=1, SampleWidth=2 bytes
Processing 122 frames of 30ms each...
Detected speech from 0ms to 1920ms
Trimming audio from 0ms to 2420ms (including padding)
Saving trimmed a