# Demo Video-Timestamps

In [None]:
from datetime import datetime
import os
import pathlib
import librosa
import soundfile as sf
import yt_dlp
from tqdm import tqdm

## Download audio

In [None]:
def download_youtube_audio(url: str, output_dir: str) -> None:
    """
    Downloads the audio from a YouTube video and saves it as a WAV file.

    Args:
        url (str): The URL of the YouTube video.
        output_dir (str): The directory where the audio file will be saved.

    Returns:
        None
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
        'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        print(f"Audio saved in: {output_dir}")
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
url = "https://www.youtube.com/watch?v=pBy1zgt0XPc"
youtube_audio_download(url)

## Convert Audio Timestamps

In [None]:
from datetime import datetime

def to_seconds(date_time: datetime.time) -> float:
    """
    Converts a datetime.time object to total seconds.

    Args:
        date_time (datetime.time): The time object to convert.

    Returns:
        float: The total number of seconds.
    """
    try:
        total_seconds = (
            date_time.hour * 3600
            + date_time.minute * 60
            + date_time.second
            + (date_time.microsecond / 1e6)
            + 0.25
        )
        return total_seconds
    except AttributeError as e:
        print(f"An error occurred: {e}")
        return 0.0

In [None]:
to_seconds(datetime.now().time())

## Split Timestamps - Audio from SRT

In [3]:
def split_audio(audio_dir: str, timestamps: list[tuple[float, float]], target_folder: str) -> list[str]:
    """
    Splits an audio file into multiple chunks based on given timestamps.

    Args:
        audio_dir (str): The path to the audio file.
        timestamps (List[Tuple[float, float]]): List of (start, stop) times in seconds.
        target_folder (str): The folder where the split audio files will be saved.

    Returns:
        List[str]: List of paths to the saved audio chunks.
    """
    sampling_rate = 16000
    try:
        y, sr = librosa.load(audio_dir, sr=sampling_rate)
    except Exception as e:
        print(f"An error occurred while loading the audio file: {e}")
        return []

    indices = [(int(start * sr), int(stop * sr)) for start, stop in timestamps]

    audio_name = pathlib.Path(audio_dir).stem
    dest_dir = os.path.join(target_folder, audio_name)
    os.makedirs(dest_dir, exist_ok=True)

    output_file_list = []
    for i, (start_index, end_index) in tqdm(enumerate(indices), total=len(indices)):
        split_audio = y[start_index:end_index]
        output_file = os.path.join(dest_dir, f"{audio_name}_chunk_{i:05d}.wav")
        try:
            sf.write(output_file, split_audio, sampling_rate)
        except Exception as e:
            print(f"An error occurred while writing the audio file: {e}")
            continue
        output_file_list.append(f"{audio_name}/{audio_name}_chunk_{i:05d}.wav")
    
    return output_file_list


In [None]:
SRT_DIR = "" # Add this

subs = pysrt.open(str(SRT_DIR / target_srt))
start_list = [to_second(sub.start.to_time()) for sub in subs if sub.text != ""]
end_list = [to_second(sub.end.to_time()) for sub in subs if sub.text != ""]
text_list = [sub.text for sub in subs if sub.text != ""]