In [1]:
from dotenv import load_dotenv
import os
from utils import download_youtube_audio,split_audio_on_silence, process_chunks_and_collect_transcripts, create_srt_file


In [2]:
load_dotenv()

SARVAM_KEY = os.getenv('SARVAM_KEY')

# Empty the audio chunks

In [None]:
# empty the audio_chunks folder
os.system('rm -rf audio_chunks/*')

# Inputs

In [3]:
# take youtube link as input from user
youtube_link = input("Enter the youtube link: ")

In [4]:
# take language from users
# hi-IN: Hindi, bn-IN: Bengali, kn-IN: Kannada, ml-IN: Malayalam, mr-IN: Marathi, od-IN: Odia, pa-IN: Punjabi, ta-IN: Tamil, te-IN: Telugu, gu-IN: Gujarati, en-IN: English
num_to_language_mapping = {
    0: 'Unknown',
    1: 'Hindi',
    2: 'Telugu',
    3: 'Malayalam',
    4: 'Kannada',
    5: 'Bengali',
    6: 'Marathi',
    7: 'Odia',
    8: 'Punjabi',
    9: 'Tamil',
    10: 'English',
    11: 'Gujarati'
}

print("""
Language of the video and the number to be entered:
    Unknow language: 0
    Hindi: 1
    Telugu: 2
    Malayalam: 3
    Kannada: 4
    Bengali: 5
    Marathi: 6
    Odia: 7
    Punjabi: 8
    Tamil: 9
    English: 10
    Gujarati: 11
      """)
language_num = int(input("Enter the language number: "))

if language_num not in num_to_language_mapping.keys():
    language_num = 0


if language_num == 0:
    prompt = None
else:
    prompt = f"This is a {num_to_language_mapping[language_num]} language audio clip from a Youtube Video"


print('The prompt to assist is:')
print(prompt)


Language of the video and the number to be entered:
    Unknow language: 0
    Hindi: 1
    Telugu: 2
    Malayalam: 3
    Kannada: 4
    Bengali: 5
    Marathi: 6
    Odia: 7
    Punjabi: 8
    Tamil: 9
    English: 10
    Gujarati: 11
      
The prompt to assist is:
This is a Malayalam language audio clip from a Youtube Video


# Download audio

In [5]:
downloaded_audio = download_youtube_audio(youtube_link, output_path='audio_files', audio_format='mp3')
if downloaded_audio:
    print(f"Downloaded audio file path: {downloaded_audio}")
else:
    raise Exception("Failed to download audio file")

[youtube] Extracting URL: https://www.youtube.com/watch?v=Ap0Uwc4xH50
[youtube] Ap0Uwc4xH50: Downloading webpage
[youtube] Ap0Uwc4xH50: Downloading ios player API JSON
[youtube] Ap0Uwc4xH50: Downloading web creator player API JSON
[youtube] Ap0Uwc4xH50: Downloading m3u8 information
[info] Ap0Uwc4xH50: Downloading 1 format(s): 251
[download] Destination: audio_files/Kudumba Nakshatram - Not a Review ｜ Reeload Roast.webm
[download] 100% of    5.14MiB in 00:00:00 at 18.59MiB/s    
[ExtractAudio] Destination: audio_files/Kudumba Nakshatram - Not a Review ｜ Reeload Roast.mp3
Deleting original file audio_files/Kudumba Nakshatram - Not a Review ｜ Reeload Roast.webm (pass -k to keep)
Audio downloaded successfully: audio_files/Kudumba Nakshatram - Not a Review ｜ Reeload Roast.mp3
Downloaded audio file path: audio_files/Kudumba Nakshatram - Not a Review ｜ Reeload Roast.mp3


# Split audio into Chunks

In [6]:
if downloaded_audio:
    try:
        chunks = split_audio_on_silence(
            downloaded_audio,
            output_dir='audio_chunks',
            min_silence_len=1000,
            silence_thresh=-40,
            keep_silence=500
        )
    except Exception as e:
        raise Exception(f"Failed to split audio into chunks: {e}")
else:
    raise Exception("Download audio file not found")

Splitting audio into chunks based on silence...
Created chunk_1_1.mp3: 0.00s to 6.88s
Created chunk_1_2.mp3: 6.88s to 13.76s
Created chunk_1_3.mp3: 13.76s to 20.65s
Created chunk_1_4.mp3: 20.65s to 27.53s
Created chunk_1_5.mp3: 27.53s to 34.41s
Created chunk_1_6.mp3: 34.41s to 41.29s
Created chunk_1_7.mp3: 41.29s to 48.17s
Created chunk_1_8.mp3: 48.17s to 55.06s
Created chunk_1_9.mp3: 55.06s to 61.94s
Created chunk_1_10.mp3: 61.94s to 68.82s
Created chunk_1_11.mp3: 68.82s to 75.70s
Created chunk_1_12.mp3: 75.70s to 82.58s
Created chunk_1_13.mp3: 82.58s to 89.47s
Created chunk_1_14.mp3: 89.47s to 96.35s
Created chunk_1_15.mp3: 96.35s to 103.23s
Created chunk_1_16.mp3: 103.23s to 110.11s
Created chunk_1_17.mp3: 110.11s to 116.99s
Created chunk_1_18.mp3: 116.99s to 123.88s
Created chunk_1_19.mp3: 123.88s to 130.76s
Created chunk_1_20.mp3: 130.76s to 137.64s
Created chunk_1_21.mp3: 137.64s to 144.52s
Created chunk_1_22.mp3: 144.52s to 151.40s
Created chunk_1_23.mp3: 151.40s to 158.29s
Crea

# Send API request

In [7]:
try:
    transcripts = process_chunks_and_collect_transcripts(chunks, SARVAM_KEY, prompt=prompt)
except Exception as e:
    raise Exception(f"Failed to collect transcripts: {e}")

Processing chunk: audio_chunks/chunk_1_1.mp3 from 0.00s to 6.88s
Transcription successful for chunk_1_1.mp3
Processing chunk: audio_chunks/chunk_1_2.mp3 from 6.88s to 13.76s
Transcription successful for chunk_1_2.mp3
Processing chunk: audio_chunks/chunk_1_3.mp3 from 13.76s to 20.65s
Transcription successful for chunk_1_3.mp3
Processing chunk: audio_chunks/chunk_1_4.mp3 from 20.65s to 27.53s
Transcription successful for chunk_1_4.mp3
Processing chunk: audio_chunks/chunk_1_5.mp3 from 27.53s to 34.41s
Transcription successful for chunk_1_5.mp3
Processing chunk: audio_chunks/chunk_1_6.mp3 from 34.41s to 41.29s
Transcription successful for chunk_1_6.mp3
Processing chunk: audio_chunks/chunk_1_7.mp3 from 41.29s to 48.17s
Transcription successful for chunk_1_7.mp3
Processing chunk: audio_chunks/chunk_1_8.mp3 from 48.17s to 55.06s
Transcription successful for chunk_1_8.mp3
Processing chunk: audio_chunks/chunk_1_9.mp3 from 55.06s to 61.94s
Transcription successful for chunk_1_9.mp3
Processing ch

# Create subtitle file

In [8]:
if transcripts:
    try:
        create_srt_file(transcripts, output_file='subtitles.srt', max_chars=42)
    except Exception as e:
        raise Exception(f"Failed to create srt file: {e}")
else:
    raise Exception("No transcripts found")

SRT file created at subtitles.srt
