In [122]:
import os
import subprocess
import sys
import warnings

warnings.filterwarnings("ignore")

# charsiu directory adding to path
charsiu_dir = '/Users/oishanibandopadhyay/Documents/charsiu-main'
charsiu_src = f'{charsiu_dir}/src/'
if charsiu_src not in sys.path:
    sys.path.append(charsiu_src)

# import model
from Charsiu import charsiu_forced_aligner

# add audio folder
audio_folder = '/Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio'

# intialize forced aligner
os.chdir(charsiu_dir)
charsiu = charsiu_forced_aligner(aligner='charsiu/en_w2v2_fc_10ms')

# list all wav files that are not 16k
audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.wav') and not f.endswith('_16k.wav')]


In [123]:
# file processing for every file in folder
for audio_filename in audio_files:
    
    # Full path of the input audio
    input_file = os.path.join(audio_folder, audio_filename)
    
    # Generate output filenames
    base_name = audio_filename.rsplit('.', 1)[0]  # remove extension
    resampled_filename = f"{base_name}_16k.wav"
    resampled_file = os.path.join(audio_folder, resampled_filename)
    
    txt_filename = f"{base_name}.txt"
    txt_file = os.path.join(audio_folder, txt_filename)
    
    textgrid_filename = f"{base_name}.TextGrid"
    textgrid_file = os.path.join(audio_folder, textgrid_filename)

    # check if 16k audio exists
    if os.path.exists(resampled_file):
        print(f"16k file already exists for {base_name}, skipping resampling...")
    else:
        print(f"Resampling {audio_filename} to 16kHz...")
        cmd = [
            "ffmpeg",
            "-i", input_file,
            "-ar", "16000",
            "-ac", "1",
            "-sample_fmt", "s16",
            resampled_file
        ]

        try:
            subprocess.run(cmd, check=True)
            print(f"Resampled audio saved to {resampled_file}")
        except subprocess.CalledProcessError as e:
            print(f"Error resampling {audio_filename}: {e}")
            continue

    # load corresponding text file
    if not os.path.exists(txt_file):
        print(f"Missing transcription file for {base_name}! Skipping...")
        continue

    with open(txt_file, 'r') as f:
        text = f.read().strip()

    # check if TextGrid exists
    if os.path.exists(textgrid_file):
        print(f"TextGrid already exists for {base_name}, skipping alignment...")
        continue

    # perform forced alignment
    print(f"Aligning audio and transcription for {base_name}...")

    try:
        # Run alignment
        alignment = charsiu.align(audio=resampled_file, text=text)
        
        # Save the alignment to TextGrid
        charsiu.serve(audio=resampled_file, text=text, save_to=textgrid_file)
        print(f"TextGrid saved to {textgrid_file}")
        
    except Exception as e:
        print(f"Error aligning {base_name}: {e}")
        continue

print("\nBatch processing complete!")

Resampling fa55ab21ba76f0e1d8bf041b-48-ExpPromptResp6.wav to 16kHz...
Resampled audio saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-48-ExpPromptResp6_16k.wav
Aligning audio and transcription for fa55ab21ba76f0e1d8bf041b-48-ExpPromptResp6...


ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspee

Alignment output has been saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-48-ExpPromptResp6.TextGrid
TextGrid saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-48-ExpPromptResp6.TextGrid
Resampling fa55ab21ba76f0e1d8bf041b-53-ExpPromptResp1.wav to 16kHz...
Resampled audio saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-53-ExpPromptResp1_16k.wav
Aligning audio and transcription for fa55ab21ba76f0e1d8bf041b-53-ExpPromptResp1...


ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspee

Alignment output has been saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-53-ExpPromptResp1.TextGrid
TextGrid saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-53-ExpPromptResp1.TextGrid
Resampling fa55ab21ba76f0e1d8bf041b-50-ExpPromptResp2.wav to 16kHz...
Resampled audio saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-50-ExpPromptResp2_16k.wav
Aligning audio and transcription for fa55ab21ba76f0e1d8bf041b-50-ExpPromptResp2...


ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspee

Alignment output has been saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-50-ExpPromptResp2.TextGrid
TextGrid saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-50-ExpPromptResp2.TextGrid
Resampling fa55ab21ba76f0e1d8bf041b-59-ExpPromptResp7.wav to 16kHz...
Resampled audio saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-59-ExpPromptResp7_16k.wav
Aligning audio and transcription for fa55ab21ba76f0e1d8bf041b-59-ExpPromptResp7...


ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspee

Alignment output has been saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-59-ExpPromptResp7.TextGrid
TextGrid saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-59-ExpPromptResp7.TextGrid
Resampling fa55ab21ba76f0e1d8bf041b-56-LocPromptResp34.wav to 16kHz...
Resampled audio saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-56-LocPromptResp34_16k.wav
Aligning audio and transcription for fa55ab21ba76f0e1d8bf041b-56-LocPromptResp34...


ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspee

Alignment output has been saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-56-LocPromptResp34.TextGrid
TextGrid saved to /Users/oishanibandopadhyay/Documents/UCSD/LIGN 214/final_project/nineteenth_participant_audio/fa55ab21ba76f0e1d8bf041b-56-LocPromptResp34.TextGrid

Batch processing complete!
