### Pip installing Whisper and GoogleTrans

In [1]:
! pip install git+https://github.com/openai/whisper.git -q

[K     |████████████████████████████████| 5.8 MB 22.1 MB/s 
[K     |████████████████████████████████| 182 kB 76.5 MB/s 
[K     |████████████████████████████████| 7.6 MB 59.4 MB/s 
[?25h  Building wheel for whisper (setup.py) ... [?25l[?25hdone


In [2]:
pip install googletrans==3.1.0a0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting googletrans==3.1.0a0
  Downloading googletrans-3.1.0a0.tar.gz (19 kB)
Collecting httpx==0.13.3
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 3.6 MB/s 
Collecting hstspreload
  Downloading hstspreload-2022.12.1-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 55.8 MB/s 
Collecting sniffio
  Downloading sniffio-1.3.0-py3-none-any.whl (10 kB)
Collecting httpcore==0.9.*
  Downloading httpcore-0.9.1-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 1.5 MB/s 
[?25hCollecting rfc3986<2,>=1.3
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)
Collecting h11<0.10,>=0.8
  Downloading h11-0.9.0-py2.py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.1 MB/s 
[?25hCollecting h2==3.*
  Downloading h2-3.2.0-py2.py3-none-any.whl (65 kB)
[K     |████████████

In [3]:
pip install translate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting translate
  Downloading translate-3.6.1-py2.py3-none-any.whl (12 kB)
Collecting libretranslatepy==2.1.1
  Downloading libretranslatepy-2.1.1-py3-none-any.whl (3.2 kB)
Installing collected packages: libretranslatepy, translate
Successfully installed libretranslatepy-2.1.1 translate-3.6.1


### Importing Libraries

In [19]:
import os
import sys
import subprocess
import pandas as pd

### Loading Medium model from Whisper

In [20]:
import whisper
model = whisper.load_model('medium') 

### Converting Mp4 file to an Mp3 file

In [6]:
def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"], 
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"

In [21]:
input_video = "/content/ShortFilm.mp4"

In [8]:
audio_file = video2mp3(input_video)

In [9]:
from IPython.display import Audio
Audio(audio_file)

### Using Whisper to transcribe the audio file

In [22]:
def translate(audio):
    options = dict(beam_size=5, best_of=5)
    translate_options = dict(task="translate", **options)
    result = model.transcribe(audio_file, **translate_options)
    return result

In [11]:
result = translate(audio_file)

In [12]:
(result["segments"])[0]

{'id': 0,
 'seek': 3000,
 'start': 30.0,
 'end': 32.96,
 'text': ' BATTLE ANIMATED',
 'tokens': [363, 43321, 2634, 5252, 6324, 2218, 4731],
 'temperature': 1.0,
 'avg_logprob': -2.941813514346168,
 'compression_ratio': 0.8490566037735849,
 'no_speech_prob': 0.10619298368692398}

In [13]:
((result["segments"])[0])['start']

30.0

In [14]:
len(result['segments'])

167

### Creating a DataFrame

In [15]:
start = []
end = []
text = []

for i in range(len(result['segments'])):
  start.append(str(((result['segments'])[i])['start']))
  end.append(str(((result['segments'])[i])['end']))
  eng = str(((result['segments'])[i])['text'])
  text.append(eng)
  
  # translation = t.translate(eng)
  # Hindi.append(translation.text)

### Translating the Generated English Text to Hindi

In [16]:
Hindi = []
from googletrans import Translator
t = Translator()

for i in range(len(result['segments'])):
  translation = t.translate(((result['segments'])[i])['text'], dest = 'hi')
  Hindi.append(translation.text)

In [17]:
df = pd.DataFrame([start, end, text, Hindi]).T
df = df.rename(columns = {0 : 'Start', 1 : 'End' , 2 : "Text" , 3 : "Hindi"})
df

Unnamed: 0,Start,End,Text,Hindi
0,30.0,32.96,BATTLE ANIMATED,लड़ाई एनिमेटेड
1,56.64,58.68,What are life's big fans for?,जीवन के बड़े प्रशंसक किस लिए हैं?
2,58.68,61.68,"If you are a friend, you are like Karna.","यदि आप मित्र हैं, तो आप कर्ण के समान हैं।"
3,61.68,63.68,"Whoever knows that he is defeated,","जो जानता है कि वह हार गया है,"
4,63.68,66.68,still he fights for you.,फिर भी वह तुम्हारे लिए लड़ता है।
...,...,...,...,...
162,872.68,875.68,You will see the way ahead on your own.,आगे का रास्ता आप खुद देखेंगे।
163,875.68,879.68,Say thank you to your bad time.,अपने बुरे समय को थैंक्यू कहें।
164,879.68,906.68,Because it is the one who has made you so str...,क्योंकि यह वही है जिसने आपको इतना मजबूत बनाया है।
165,909.68,911.68,Thank you for watching.,देखने के लिए धन्यवाद।


### Generating a Text file with TimeStamps and Hindi Subtitles

In [18]:
file = open("Hindi_Subtitles.txt","w")
L = ['Hindi Subtitles']
for i in range(len(df)):

  # ok = str(str(i) + '. ' + str(df['Start'][i]) + "  ------>  " + str(df['End'][i]) + "\n" + str(df['Hindi'[i]] + '\n'))
  # file.writelines(ok)
  file.writelines(str(i))
  file.writelines('.     ')
  file.writelines(str(df['Start'][i]))
  file.writelines(" -------> ")
  file.writelines(str(df["End"][i]))
  file.writelines("\n")
  file.writelines('       ')
  file.writelines(str(df['Hindi'][i]))
  file.writelines('\n\n\n')

file.close()
