# Annotating Video text

Replace the youtube video link in the following code cell

In [1]:
!pip install youtube_dl -q

import youtube_dl

link = "https://www.youtube.com/watch?v=OZ--BAModfw"

ydl_opts = {}

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    info_dict = ydl.extract_info(link, download=False)
    video_title = info_dict.get('title', None)

# path = f'./{video_title}.mp4'
path = f'./news.mp4'

ydl_opts.update({'outtmpl':path})

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    ydl.download([link])

[K     |████████████████████████████████| 1.9MB 8.2MB/s 
[?25h[youtube] OZ--BAModfw: Downloading webpage
[youtube] OZ--BAModfw: Downloading webpage
[download] Destination: ./news.f135.mp4
[download] 100% of 2.13MiB in 00:00
[download] Destination: ./news.mp4.f140
[download] 100% of 418.31KiB in 00:00
[ffmpeg] Merging formats into "./news.mp4"
Deleting original file ./news.f135.mp4 (pass -k to keep)
Deleting original file ./news.mp4.f140 (pass -k to keep)


In [2]:
!ls

news.mp4  sample_data


In [4]:
import imageio
from PIL import Image
import cv2
import numpy as np
import os
import subprocess
import matplotlib.pyplot as plt
import matplotlib.patches as patches
# import seaborn as sns
# from IPython.display import Video, display
import warnings
warnings.filterwarnings("ignore")

In [6]:
# Video("news.mp4")
# Video cannot be viewed on colab otherwise the runtime crashes

In [17]:
video_name = "news.mp4"
video_name

'news.mp4'

In [21]:
video_path = f"{video_name}"

In [22]:
VIDEO_CODEC = "MP4V"
video_name = os.path.basename(video_path)
vidcap = cv2.VideoCapture(video_path)
fps = vidcap.get(cv2.CAP_PROP_FPS)
width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_path = "labeled_" + video_name
tmp_output_path = "tmp_" + output_path
output_video = cv2.VideoWriter(tmp_output_path, cv2.VideoWriter_fourcc(*VIDEO_CODEC), fps, (width, height))

In [23]:
# A sanity check
video_name, fps, height, width

('news.mp4', 25.0, 480, 640)

In [11]:
!pip install pytesseract -q
!sudo apt install tesseract-ocr -q
# ! apt install libtesseract-dev -q
!pip install tesseract -q
import pytesseract
import sys

  Building wheel for pytesseract (setup.py) ... [?25l[?25hdone
Reading package lists...
Building dependency tree...
Reading state information...
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 31 not upgraded.
Need to get 4,795 kB of archives.
After this operation, 15.8 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tesseract-ocr-eng all 4.00~git24-0e00fe6-1.2 [1,588 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tesseract-ocr-osd all 4.00~git24-0e00fe6-1.2 [2,989 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tesseract-ocr amd64 4.00~git2288-10f4998a-2 [218 kB]
Fetched 4,795 kB in 2s (2,011 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialo

In [24]:
def get_string(img):
    # Convert to gray
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Apply dilation and erosion to remove some noise
    kernel = np.ones((1, 1), np.uint8)
    img = cv2.dilate(img, kernel, iterations=1)
    img = cv2.erode(img, kernel, iterations=1)
    # Recognize text with tesseract for python
    result = pytesseract.image_to_string(img, lang='eng', config='--psm6')
    return result

In [25]:
pytesseract.pytesseract.tesseract_cmd = (r'/usr/bin/tesseract')

Following cell takes 10 minutes for completion

In [26]:
annotations = []
frame = 0
while True:
    it_worked, img = vidcap.read()
    if not it_worked:
        break
    frame += 1
    img_name = f"{video_name}_frame{frame}"
    text = get_string(img)
    
    cv2.putText(img, text, (10,400), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), thickness=1)
    d = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
    n_boxes = len(d["level"])
    for i in range(n_boxes):
        if int(d["conf"][i])>0:
            (x, y, w, h) = (d["left"][i], d["top"][i], d["width"][i], d["height"][i])
            cv2.rectangle(img, (x,y), (x+w, y+h), (0, 255, 255), 2)
            annotations.append(text)
            #         color = (0,255,0)
#         cv2.putText(img, text, (x,y), (x+w, y+h), cv2.FONT_HERSHEY_SIMPLEX, color, 2)
#     cv2.imshow(img)
    output_video.write(img)
output_video.release()
if os.path.exists(output_path):
    os.remove(output_path)
subprocess.run(["ffmpeg", "-i", tmp_output_path, "-crf", "18", "-preset", "slow", "-vcodec", "libx264", output_path])
os.remove(tmp_output_path)

### Annotated video

The captions appear in the middle while the captions are also stored in a variable annotations

In [None]:
# Video("labeled_news.mp4")
# Video cannot be viewed on colab otherwise the runtime crashes

Now save the annotations to a file

In [27]:
with open("annotations.txt", "w") as output:
    output.write(str(annotations))