# Install the dependencies

In [1]:
!pip install pysrt

Collecting pysrt
  Downloading pysrt-1.1.2.tar.gz (104 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/104.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m102.4/104.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.4/104.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pysrt
  Building wheel for pysrt (setup.py) ... [?25l[?25hdone
  Created wheel for pysrt: filename=pysrt-1.1.2-py3-none-any.whl size=13443 sha256=bd0385ac8194a84201d7efa822b74e7f7c3f9c77e45f72b5e2b2e9e5c2f7f1da
  Stored in directory: /root/.cache/pip/wheels/30/7f/e8/55de9a9b07302d9e7fe47c27910e3bea0c48536153e74bd7e6
Successfully built pysrt
Installing collected packages: pysrt
Successfully installed pysrt-1.1.2


# Connect to the drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import the libraries

In [3]:
# import whisperx
import pysrt
import subprocess
from pathlib import Path
import os
from datetime import datetime

# Set up the paths

In [4]:
audio_file = "/content/drive/MyDrive/Audio_Data/20131_M061_Vocals.wav"
srt_file = "/content/drive/MyDrive/Audio_Data/20131_M061_Vocals.srt"
output_dir = "/content/drive/MyDrive/Audio_Output"

# Get min duration

Here, we are trying to find out the minimum duration between two consequtive srt entries. This will be used as a padding later.

In [5]:
def calculate_min_duration(file_path):
    subs = pysrt.open(file_path)
    min_duration = float('inf')  # Initialize with infinity

    for i in range(1, len(subs)):
        end_time = datetime.combine(datetime.today(), subs[i-1].end.to_time())
        start_time = datetime.combine(datetime.today(), subs[i].start.to_time())
        duration = (start_time - end_time).total_seconds()
        if duration < min_duration:
            min_duration = duration

    return min_duration

In [6]:
min_duration = calculate_min_duration(srt_file)
print("Minimum duration between consecutive entries:", min_duration, "seconds")

Minimum duration between consecutive entries: 0.02 seconds


# Segment the audio

In [7]:
def segment_audio_by_subtitles(audio_file, srt_file, output_dir, padding=0.5):
    # Load the SRT file
    subtitles = pysrt.open(srt_file)

    # Extract file name and
    file_name = os.path.basename(audio_file) # Output: 20131_M009.wav
    base_name = os.path.splitext(file_name)[0]

    # Create directory to store the audios
    new_folder_path = os.path.join(output_dir, base_name)

    # Create the new folder if it doesn't exist
    os.makedirs(new_folder_path, exist_ok=True)

    # Create a list to store all the extracted subtitles
    all_texts = []

    # Iterate through each subtitle entry
    for i, subtitle in enumerate(subtitles):
        start_time = subtitle.start.to_time().strftime('%H:%M:%S.%f')[:-3]
        end_time = subtitle.end.to_time().strftime('%H:%M:%S.%f')[:-3]

        # Extend the end time by the padding duration
        end_time_padded = (subtitle.end + pysrt.SubRipTime(milliseconds=int(padding * 1000))).to_time().strftime('%H:%M:%S.%f')[:-3]

        # Segment the audio using ffmpeg
        output_filename = f"{new_folder_path}/{base_name}_segment_{i + 1}.wav"
        subprocess.run(["ffmpeg", "-i", audio_file, "-ss", start_time, "-to", end_time_padded, "-c", "copy", output_filename])

        print(f"Segment {i + 1}: {output_filename} created")

        # Extract the subtitle text and add it to the list
        subtitle_text = subtitle.text
        subtitle_text = f"wavs/{base_name}_segment_{i + 1}.wav|{subtitle_text}"
        all_texts.append(subtitle_text)

    # Write all the extracted subtitles to a single text file
    with open(f"{new_folder_path}/metadata-{base_name}.txt", "w") as all_texts_file:
        all_texts_file.write("\n".join(all_texts))

In [8]:
# Create the output directory if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)

In [9]:
# Check if min_duration is 0.0
if min_duration == 0.0:
    min_duration = 0.02
print(min_duration)

0.02


In [10]:
# Segment the audio based on subtitles
padding =  2 * min_duration # Padding duration in seconds # tested with 0.5, made it worst!
segment_audio_by_subtitles(audio_file, srt_file, output_dir, padding)

Segment 1: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_1.wav created
Segment 2: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_2.wav created
Segment 3: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_3.wav created
Segment 4: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_4.wav created
Segment 5: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_5.wav created
Segment 6: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_6.wav created
Segment 7: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_7.wav created
Segment 8: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_8.wav created
Segment 9: /content/drive/MyDrive/Audio_Output/20131_M061_Vocals/20131_M061_Vocals_segment_9.wav created
Segment 10: /content/drive/MyDrive/Audio_Output/20131_M