In [None]:
!pip install -U openai-whisper
!pip install python-docx

In [1]:
import whisper
import os
from glob import glob
from docx import Document
from datetime import timedelta

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
whisper.available_models()

['tiny.en',
 'tiny',
 'base.en',
 'base',
 'small.en',
 'small',
 'medium.en',
 'medium',
 'large-v1',
 'large-v2',
 'large']

In [2]:
model = whisper.load_model("large-v2")

In [4]:
def parse_segments(segments):
    text = []
    for segment in segments:
        text.append(segment['text'].lstrip())
        text.append('\n')

    return ''.join(text)

def save_srt(result, file_name):
    outfile, _ = os.path.splitext(file_name)
    segments = result['segments']
    for segment in segments:
        startTime = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
        endTime = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
        text = segment['text']
        segmentId = segment['id']+1
        segment = f"{segmentId}\n{startTime} --> {endTime}\n{text[1:] if text[0] == ' ' else text}\n\n"

        srtFilename = os.path.join(f"{outfile}.srt")
        with open(srtFilename, 'a', encoding='utf-8') as srtFile:
            srtFile.write(segment)


In [5]:
for file_name in glob(r'videos/*.mp3'):
    print(file_name)
    outfile, _ = os.path.splitext(file_name)
    result = model.transcribe(file_name)
    save_srt(result, file_name)
    
    document = Document()
    document.add_paragraph(parse_segments(result['segments']))
    document.save(outfile + '.docx')


videos\A 10 Close Air Support.mp3
videos\ATC_audio_original_soft_static.mp3


# MP4 to MP3

In [None]:
!pip install moviepy

In [7]:
from moviepy.editor import *
video = VideoFileClip('c:\\Users\\linoa\\Documents\\Code\\whisper_subtitles\\./videos/ATC_audio.mp4')
video.audio.write_audiofile('c:\\Users\\linoa\\Documents\\Code\\whisper_subtitles\\./videos/ATC_audio.mp3')

MoviePy - Writing audio in c:\Users\linoa\Documents\Code\whisper_subtitles\./videos/ATC_audio.mp3


                                                                      

MoviePy - Done.




# Add subtitles to mp4

In [None]:


from moviepy.editor import VideoFileClip
from moviepy.video.fx.all import TextClip, CompositeVideoClip

# Load video
video = VideoFileClip("videos/A 10 Close Air Support.mp4")

# Create a TextClip (subtitle)
subtitle = TextClip("Your subtitle here", fontsize=24, color='white')

# Position the subtitle at the bottom and for a duration of 10 seconds
subtitle = subtitle.set_position(('center', 'bottom')).set_duration(10)

# Overlay subtitle on video
final_video = CompositeVideoClip([video, subtitle])

# Write to a file
final_video.write_videofile("output_video.mp4")

In [None]:
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import textwrap

# Load the video
video = VideoFileClip("videos/A 10 Close Air Support.mp4")

# Load the transcript file
transcript = result['segments']

# Create a subtitles clip
txt_clips = []
for text in transcript:
  subtitle = text['text']
  subtitle = "\n".join(textwrap.wrap(subtitle, 50))
  txt_clip = TextClip(subtitle,fontsize = 12,  font="Amiri-Bold", kerning=1, bg_color = 'black', color='white')
  txt_clip = txt_clip.set_start(text['start'])
  txt_clip = txt_clip.set_position((0.2,0.8), relative=True).set_duration(text['end']-text['start'])
  txt_clips.append(txt_clip)  
  

# concatenate the video and subtitles
final_video = CompositeVideoClip([video]+txt_clips)

# Save the final video
final_video.write_videofile("videos/output.mp4")

# Download videos from Youtube for testing 

In [None]:
!pip install pytube


In [None]:
from pytube import YouTube

link = "https://www.youtube.com/watch?v=jdBzyAURWEI"
yt = YouTube(link)

# Download the video with the highest resolution and file type (usually mp4)
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
video.download('./videos/')