In [3]:
import os
from scipy.io import wavfile
import textgrids

In [4]:
file_name = "F_08_1"
frequency, signal = wavfile.read("F_08_1.wav")
text_grid = textgrids.TextGrid(f"{file_name}.TextGrid")

In [21]:
AUDIOS_FOLDER = "transformed_audios"
DEST_AUDIOS_FOLDER = "cropped_audios"
TRANSCRIPTION_FOLDER = "annotations"
DEST_TRANSCRIPTION_FOLDER = "cropped_annotations"

In [26]:
def crop_audio(file_name):
    print(f"Cropping file {file_name}")
    frequency, signal = wavfile.read(
        os.path.join(
            AUDIOS_FOLDER,
            f"{file_name}.wav"
        )
    )
    text_grid = textgrids.TextGrid(
        os.path.join(
            TRANSCRIPTION_FOLDER,
            f"{file_name}.TextGrid"
        )
    )
    intervals = text_grid[file_name]
    initial_second = -1
    end_second = signal.shape[-1] / frequency
    for interval in intervals:
        if interval.text == "1":
            initial_second = interval.xmin
        if interval.text == "" and initial_second > -1:
            end_second = interval.xmax
            break
    cropped_signal = signal[int(initial_second*frequency):int(end_second*frequency)]
    wavfile.write(
        os.path.join(
            DEST_AUDIOS_FOLDER,
            f"cropped_{file_name}.wav"
        ),
        frequency,
        cropped_signal
    )
    print(f"File {file_name} cropped successfully")

    intervals = text_grid[file_name]
    cropped_textgrid = textgrids.TextGrid()
    cropped_textgrid.xmin = 0
    cropped_textgrid.xmax = intervals[-1].xmax - initial_second
    tier = textgrids.Tier()
    cropped_textgrid[file_name] = tier
    for interval in intervals:
        if interval.xmin - initial_second >= 0:
            tier.append(
                textgrids.Interval(
                    interval.text,
                    interval.xmin - initial_second,
                    interval.xmax - initial_second
                )
            )

    cropped_textgrid.write(
        os.path.join(
            DEST_TRANSCRIPTION_FOLDER,
            f"cropped_{file_name}.TextGrid"
        )
    )



In [27]:
for file_name in os.listdir(TRANSCRIPTION_FOLDER):
    print(file_name)
    try:
        crop_audio(
            file_name.replace(".TextGrid", "")
        )
    except FileNotFoundError:
        print("File not found on", file_name)

F_74_1.TextGrid
Cropping file F_74_1
File F_74_1 cropped successfully
M_28_1.TextGrid
Cropping file M_28_1
File M_28_1 cropped successfully
F_63_1.TextGrid
Cropping file F_63_1
File F_63_1 cropped successfully
F_64_1.TextGrid
Cropping file F_64_1
File F_64_1 cropped successfully
F_56_1.TextGrid
Cropping file F_56_1
File F_56_1 cropped successfully
F_52_1.TextGrid
Cropping file F_52_1
File F_52_1 cropped successfully
F_27_1.TextGrid
Cropping file F_27_1
File F_27_1 cropped successfully
M_42_1.TextGrid
Cropping file M_42_1
File M_42_1 cropped successfully
F_61_1.TextGrid
Cropping file F_61_1
File F_61_1 cropped successfully
M_30_1.TextGrid
Cropping file M_30_1
File M_30_1 cropped successfully
M_05_1.TextGrid
Cropping file M_05_1
File M_05_1 cropped successfully
M_74_1.TextGrid
Cropping file M_74_1
File M_74_1 cropped successfully
M_19_1.TextGrid
Cropping file M_19_1
File M_19_1 cropped successfully
M_55_1.TextGrid
Cropping file M_55_1
File M_55_1 cropped successfully
F_73_1.TextGrid
Crop

In [28]:
crop_audio("F_69_1")

Cropping file F_69_1
File F_69_1 cropped successfully
