In [5]:
import os
import soundfile as sf
import whisper

In [3]:
# Manually specify the audio file path
audio_file = "song_odia.mp3"

In [4]:
# Check if the file exists
if not os.path.exists(audio_file):
    print("Error: File does not exist. Please check the file path.")
else:
    try:
        # Read the file to verify its validity
        with sf.SoundFile(audio_file) as audio:
            print(f"File '{audio_file}' loaded succesfully.")
            print(f'Sample rate: {audio.samplerate}, Channels: {audio.channels}')
    except Exception as e:
        print(f'Error loading file: {e}')

File 'song_odia.mp3' loaded succesfully.
Sample rate: 44100, Channels: 2


In [10]:
# Load whisper model (smaller model for faster detection)
model = whisper.load_model("large")  # medium is actually enough. But we will try with best one.

100%|█████████████████████████████████████| 2.88G/2.88G [02:20<00:00, 22.0MiB/s]


In [11]:
# Detect language
print("Detecting Language...")
result = model.transcribe(audio=audio_file, task="transcribe", language=None)
detected_lang = result["language"]

Detecting Language...


In [12]:
print(f"Detected Language: {detected_lang.upper()}")

Detected Language: GU


In [13]:
# Transcribe the audio and force English translation
print("Transcribing and translating to English...")
result = model.transcribe(audio_file, task="translate")  # Forces English output

Transcribing and translating to English...


In [14]:
# Extract the transcribed text
transcript_text = result["text"]

In [15]:
# Save the transcript to a text file
output_file = "transcript.txt"
with open(output_file, "w", encoding="utf-8") as f:
    f.write(transcript_text)

In [16]:
print(f"✅ Transcription completed! The transcript is saved as '{output_file}'.")

✅ Transcription completed! The transcript is saved as 'transcript.txt'.
