In [1]:
import cv2
import numpy as np
import pandas as pd
import time
from pytesseract import image_to_string
import matplotlib.pyplot as plt
import datetime

In [2]:
def euclidean_dist(A,B):
    return np.sqrt(np.sum((A - B)**2))

def manhattan_dist(A,B):
    return np.sum(abs(A - B))

def mse(imageA, imageB):

    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    
    return err

## Shot identification, start time, end time, no of subtopic within topics

In [3]:
capture = cv2.VideoCapture('test1.mp4')
fps = int(capture.get(cv2.CAP_PROP_FPS))

ret,prev = capture.read()
fnum = int(capture.get(cv2.CAP_PROP_POS_FRAMES))
prev = cv2.cvtColor(prev,cv2.COLOR_BGR2GRAY)
prev_canny = cv2.Canny(prev.copy(),60,120)

flag = True
topic = dict()
ts = -1
subtopic = dict()
header = None

while capture.isOpened() and ret:
    msec = capture.get(cv2.CAP_PROP_POS_MSEC)
    ret1,cur = capture.read()
    fnum = int(capture.get(cv2.CAP_PROP_POS_FRAMES))

    if ret1 == True: 

        cur = cv2.cvtColor(cur, cv2.COLOR_BGR2GRAY)
        cur_canny = cv2.Canny(cur.copy(), 60, 120)

        mean_square_error = mse(cur_canny,prev_canny)
        temp = []
        
        if mean_square_error > 1100:
            
            #thresholding for OCR
            ret,thresh = cv2.threshold(prev,127,255,cv2.THRESH_BINARY)
            
            #frame start time calculation
            start_time = time.strftime('%H:%M:%S', time.gmtime(msec/1000))
            finder = np.sum(prev_canny[0:80,40:940])
            
            if finder < 400000:

                tname = image_to_string(thresh[80:452,10:952])
                
                if tname:
                    #extend end time and subtopic count in dictionary for previous topic
                    l = len(topic)
                    if l:
                        topic[header].extend([start_time, stcount])
                    
                    temp.extend([tname,start_time])
                    ts = 0 # topic
                    stcount = 0
                    #store topic name, start time in dictionary with frame number as key
                    topic[fnum]=temp
                    header = fnum
                    
            elif finder > 800000 and finder < 1500000:
                sname = image_to_string(thresh[0:80,40:940])
                if sname:
                    if ts == 1:
                        #append subtopic name in dictionary with corresponding topic frame number as key
                        subtopic[header].append(sname)
                        stcount += 1
                    elif ts == 0: #new topic,
                        #store subtopic name in dictionary with corresponding topic frame number as key
                        subtopic[header]=[sname]
                        stcount = 1
                        ts = 1
            prev = cur        
            prev_canny = cur_canny
    
    else:
        break
        
capture.release()

#extend end time and subtopic count in dictionary for previous topic
l = len(topic)
if l:
    topic[header].extend([start_time, stcount])

In [4]:
#display topic information
print("\033[1mTopic details are as follows:\033[0m\n")

key = list(topic.keys())
v = list(topic.values())

df = pd.DataFrame(v)
#add column name
df.columns=['Topic Name','St. Time','End time','# Subtopic']
df.insert(0,'Frame #',key)

print(df.to_string(justify='right',index=False,col_space=15))

#display subtopic information
print("\n\033[1mSubtopic details are as follows:\033[0m\n")
for k,v in subtopic.items():
    print(f'  frame number: {k}')
    for i,topic_name in enumerate(v):
        print(f'\t{i+1} {topic_name}')
    print()

[1mTopic details are as follows:[0m

        Frame #                  Topic Name        St. Time        End time      # Subtopic
            229  Distribution of Name Space        00:00:07        00:04:13               6
           7629         DNS in the Internet        00:04:13        00:06:22               5
          11534             Name Resolution        00:06:22        00:11:47              12
          21308                DNS Messages        00:11:47        00:14:11               2

[1mSubtopic details are as follows:[0m

  frame number: 229
	1 Hierarchy of Name Servers
	2 Zones and Domains
	3 What is a Zone?
	4 wrm iemzepe?
	5 The Concept
	6 Zone Transfer

  frame number: 7629
	1 DNS in the Internet
	2 MMitthinlainst
	3 Generic Domains
	4 Country Domains
	5 Inverse Domain

  frame number: 11534
	1 Name Resolution Process
	2 Name Resolution Process (contd...)
	3 Name Resolution Process
	4 Name Resolution Process (contd...)
	5 Name Resolution Process (contd...)
	6 Hierarch