### Imports

In [1]:
from moviepy.editor import VideoFileClip
import whisper_timestamped as whisper
import json
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip, concatenate_videoclips
import os
### Set ROOT_DIR
ROOT_DIR = os.getcwd()

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



### Step 1: Extract audio from video 🎥→🔊

In [12]:
video_path = "/Users/kamal/Movies/fillerwords_footage/introtake5.TS.mkv"
video = VideoFileClip(video_path)
video.audio.write_audiofile(os.path.join(ROOT_DIR, "myaudio.wav"))

MoviePy - Writing audio in /Users/kamal/Documents/removefillers/myaudio.wav


                                                                     

MoviePy - Done.




### Step 2: Load Whisper model and transcribe 🎙️→📝

In [13]:
audio = whisper.load_audio(os.path.join(ROOT_DIR, "myaudio.wav"))

model = whisper.load_model("tiny", device="cpu")

result = whisper.transcribe(model, audio, language="en", detect_disfluencies=True)

print(result)

100%|██████████| 5107/5107 [00:02<00:00, 2219.28frames/s]

{'text': " So whenever I'm recording a new video, there are these super-and-like fiddle words that fill the speech. And the reason this occurs is that the brain needs to fill in the process with something. And it also usually occurs when it's the first take of the video and the script really hasn't settled in my head. Now these fiddle words, they are, you know, real annoyance for the audience. You want to figure that you can't unguide them. And for me, as a video producer, I also have to it's a really pain to add the out-of-the-figgered way they begin and really end and then cut out those parts of the video. So for this video, I thought it would be super cool to see whether I could code up a Python project that is able to detect these fiddle words. And also remove them so that I don't have to do that myself.", 'segments': [{'id': 0, 'seek': 0, 'start': 4.92, 'end': 8.74, 'text': " So whenever I'm recording a new video, there are these super-and-like", 'tokens': [50364, 407, 5699, 286, 




### Step 3: Get filler words alongwith split times 🤬~🕞

In [14]:
filler_words=[]
for text in result["segments"]:
    for word in text["words"]:
        if "[*]" in word["text"]:
            filler_words.append([word["start"], word["end"]])

In [15]:
filler_words

[[6.28, 6.42],
 [7.16, 7.6],
 [16.34, 17.26],
 [21.12, 21.28],
 [22.66, 23.48],
 [27.06, 27.36],
 [29.2, 29.76],
 [32.6, 32.98],
 [37.3, 37.72],
 [38.64, 39.5],
 [46.88, 47.18]]

In [16]:
split_times = []
final_end_time = result["segments"][-1]["end"]

for i in range(len(filler_words)):
    if i == 0:
        start = 0
        end = filler_words[i][0]
    else:
        start = filler_words[i-1][1]
        end = filler_words[i][0]  
        filler_words[i]
        
    split_times.append([start, end])
split_times.append([filler_words[-1][1], final_end_time])

In [17]:
split_times

[[0, 6.28],
 [6.42, 7.16],
 [7.6, 16.34],
 [17.26, 21.12],
 [21.28, 22.66],
 [23.48, 27.06],
 [27.36, 29.2],
 [29.76, 32.6],
 [32.98, 37.3],
 [37.72, 38.64],
 [39.5, 46.88],
 [47.18, 47.78]]

### Step 4: Split video into (filler-free) chunks 🤓

In [19]:
for i in range(len(split_times)):
    start_time = split_times[i][0]
    end_time = split_times[i][1]
    ffmpeg_extract_subclip(video_path, start_time, end_time, targetname=ROOT_DIR+"/cutvid"+str(i)+".mp4")

Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful


### Step 5: Merge videos into one final video 😃 ✅

In [20]:
clips=[]
for i in range(len(split_times)):
    clip = ROOT_DIR+"/cutvid"+str(i)+".mp4"
    clips = clips + [VideoFileClip(clip)]

final_clip = concatenate_videoclips(clips)
final_clip.write_videofile(os.path.join(ROOT_DIR, "final.mp4"))

Moviepy - Building video /Users/kamal/Documents/removefillers/final.mp4.
MoviePy - Writing audio in finalTEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
Moviepy - Writing video /Users/kamal/Documents/removefillers/final.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /Users/kamal/Documents/removefillers/final.mp4
