In [3]:
"""Importing libraries"""
import cv2
import numpy as np

In [2]:
!pip install SpeechRecognition
!pip install face-recognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.0-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.0


In [4]:
### Code to split a video into chunks of 10 secs each

import os
import json
from moviepy.video.io.VideoFileClip import VideoFileClip
import speech_recognition as sr

def split_and_save_video_chunks(video_path, chunk_length, output_dir):
    with VideoFileClip(video_path) as video:
        duration = int(video.duration)
        for i in range(0, duration, chunk_length):
            start = i
            end = min(i + chunk_length, duration)
            chunk = video.subclip(start, end)
            chunk_path = os.path.join(output_dir, f"chunk_{start:04d}_{end:04d}.mp4")
            chunk.write_videofile(chunk_path, codec="libx264")
            yield chunk, start, end

def extract_and_transcribe(video_clip, start, end, recognizer, output_dir):
    audio_path = os.path.join(output_dir, f"chunk_{start:04d}_{end:04d}.wav")
    video_clip.audio.write_audiofile(audio_path)

    with sr.AudioFile(audio_path) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Audio was not understood"
        except sr.RequestError:
            return "Request failed"

def process_videos_in_directory(directory):
    recognizer = sr.Recognizer()
    for filename in os.listdir(directory):
        if filename.endswith((".mp4",".mov")):
            video_path = os.path.join(directory, filename)
            video_name = os.path.splitext(filename)[0]
            # Creating the output directory outside the input directory
            output_dir = os.path.join(directory, video_name + '_chunks')
            os.makedirs(output_dir, exist_ok=True)

            all_transcripts = {}
            for video_clip, start, end in split_and_save_video_chunks(video_path, 10, output_dir):
                transcript = extract_and_transcribe(video_clip, start, end, recognizer, output_dir)
                all_transcripts[f"chunk_{start:04d}_{end:04d}"] = transcript

            json_path = os.path.join(output_dir, 'full_transcript.json')
            with open(json_path, 'w') as json_file:
                json.dump(all_transcripts, json_file, indent=4)

            print(f"Transcription for {video_name} completed and saved in {output_dir}")

# Usage
directory_path = "/content/"
process_videos_in_directory(directory_path)

Moviepy - Building video /content/capitalone_chunks/chunk_0000_0010.mp4.
MoviePy - Writing audio in chunk_0000_0010TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/capitalone_chunks/chunk_0000_0010.mp4





Moviepy - Done !
Moviepy - video ready /content/capitalone_chunks/chunk_0000_0010.mp4
MoviePy - Writing audio in /content/capitalone_chunks/chunk_0000_0010.wav




MoviePy - Done.
Moviepy - Building video /content/capitalone_chunks/chunk_0010_0020.mp4.
MoviePy - Writing audio in chunk_0010_0020TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/capitalone_chunks/chunk_0010_0020.mp4





Moviepy - Done !
Moviepy - video ready /content/capitalone_chunks/chunk_0010_0020.mp4
MoviePy - Writing audio in /content/capitalone_chunks/chunk_0010_0020.wav




MoviePy - Done.
Moviepy - Building video /content/capitalone_chunks/chunk_0020_0030.mp4.
MoviePy - Writing audio in chunk_0020_0030TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/capitalone_chunks/chunk_0020_0030.mp4





Moviepy - Done !
Moviepy - video ready /content/capitalone_chunks/chunk_0020_0030.mp4
MoviePy - Writing audio in /content/capitalone_chunks/chunk_0020_0030.wav




MoviePy - Done.
Moviepy - Building video /content/capitalone_chunks/chunk_0030_0040.mp4.
MoviePy - Writing audio in chunk_0030_0040TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/capitalone_chunks/chunk_0030_0040.mp4





Moviepy - Done !
Moviepy - video ready /content/capitalone_chunks/chunk_0030_0040.mp4
MoviePy - Writing audio in /content/capitalone_chunks/chunk_0030_0040.wav




MoviePy - Done.
Moviepy - Building video /content/capitalone_chunks/chunk_0040_0050.mp4.
MoviePy - Writing audio in chunk_0040_0050TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/capitalone_chunks/chunk_0040_0050.mp4





Moviepy - Done !
Moviepy - video ready /content/capitalone_chunks/chunk_0040_0050.mp4
MoviePy - Writing audio in /content/capitalone_chunks/chunk_0040_0050.wav




MoviePy - Done.
Moviepy - Building video /content/capitalone_chunks/chunk_0050_0054.mp4.
MoviePy - Writing audio in chunk_0050_0054TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/capitalone_chunks/chunk_0050_0054.mp4





Moviepy - Done !
Moviepy - video ready /content/capitalone_chunks/chunk_0050_0054.mp4
MoviePy - Writing audio in /content/capitalone_chunks/chunk_0050_0054.wav




MoviePy - Done.
Transcription for capitalone completed and saved in /content/capitalone_chunks
Moviepy - Building video /content/lunch_chunks/chunk_0000_0010.mp4.
MoviePy - Writing audio in chunk_0000_0010TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/lunch_chunks/chunk_0000_0010.mp4





Moviepy - Done !
Moviepy - video ready /content/lunch_chunks/chunk_0000_0010.mp4
MoviePy - Writing audio in /content/lunch_chunks/chunk_0000_0010.wav




MoviePy - Done.
Moviepy - Building video /content/lunch_chunks/chunk_0010_0020.mp4.
MoviePy - Writing audio in chunk_0010_0020TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/lunch_chunks/chunk_0010_0020.mp4





Moviepy - Done !
Moviepy - video ready /content/lunch_chunks/chunk_0010_0020.mp4
MoviePy - Writing audio in /content/lunch_chunks/chunk_0010_0020.wav


                                                                  

MoviePy - Done.




Moviepy - Building video /content/lunch_chunks/chunk_0020_0028.mp4.
MoviePy - Writing audio in chunk_0020_0028TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/lunch_chunks/chunk_0020_0028.mp4





Moviepy - Done !
Moviepy - video ready /content/lunch_chunks/chunk_0020_0028.mp4
MoviePy - Writing audio in /content/lunch_chunks/chunk_0020_0028.wav


                                                                  

MoviePy - Done.




Transcription for lunch completed and saved in /content/lunch_chunks
Moviepy - Building video /content/facerec-test_chunks/chunk_0000_0010.mp4.
MoviePy - Writing audio in chunk_0000_0010TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/facerec-test_chunks/chunk_0000_0010.mp4





Moviepy - Done !
Moviepy - video ready /content/facerec-test_chunks/chunk_0000_0010.mp4
MoviePy - Writing audio in /content/facerec-test_chunks/chunk_0000_0010.wav


                                                                    

MoviePy - Done.




Moviepy - Building video /content/facerec-test_chunks/chunk_0010_0012.mp4.
MoviePy - Writing audio in chunk_0010_0012TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/facerec-test_chunks/chunk_0010_0012.mp4





Moviepy - Done !
Moviepy - video ready /content/facerec-test_chunks/chunk_0010_0012.mp4
MoviePy - Writing audio in /content/facerec-test_chunks/chunk_0010_0012.wav


                                                       

MoviePy - Done.




Transcription for facerec-test completed and saved in /content/facerec-test_chunks
Moviepy - Building video /content/arthritis_chunks/chunk_0000_0010.mp4.
MoviePy - Writing audio in chunk_0000_0010TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/arthritis_chunks/chunk_0000_0010.mp4





Moviepy - Done !
Moviepy - video ready /content/arthritis_chunks/chunk_0000_0010.mp4
MoviePy - Writing audio in /content/arthritis_chunks/chunk_0000_0010.wav


                                                                  

MoviePy - Done.




Moviepy - Building video /content/arthritis_chunks/chunk_0010_0020.mp4.
MoviePy - Writing audio in chunk_0010_0020TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/arthritis_chunks/chunk_0010_0020.mp4





Moviepy - Done !
Moviepy - video ready /content/arthritis_chunks/chunk_0010_0020.mp4
MoviePy - Writing audio in /content/arthritis_chunks/chunk_0010_0020.wav




MoviePy - Done.
Moviepy - Building video /content/arthritis_chunks/chunk_0020_0021.mp4.
MoviePy - Writing audio in chunk_0020_0021TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/arthritis_chunks/chunk_0020_0021.mp4





Moviepy - Done !
Moviepy - video ready /content/arthritis_chunks/chunk_0020_0021.mp4
MoviePy - Writing audio in /content/arthritis_chunks/chunk_0020_0021.wav


                                                       

MoviePy - Done.




Transcription for arthritis completed and saved in /content/arthritis_chunks
Moviepy - Building video /content/pills_chunks/chunk_0000_0008.mp4.
MoviePy - Writing audio in chunk_0000_0008TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/pills_chunks/chunk_0000_0008.mp4





Moviepy - Done !
Moviepy - video ready /content/pills_chunks/chunk_0000_0008.mp4
MoviePy - Writing audio in /content/pills_chunks/chunk_0000_0008.wav


                                                                    

MoviePy - Done.




Transcription for pills completed and saved in /content/pills_chunks
Moviepy - Building video /content/optimize_chunks/chunk_0000_0010.mp4.
MoviePy - Writing audio in chunk_0000_0010TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0000_0010.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0000_0010.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0000_0010.wav




MoviePy - Done.
Moviepy - Building video /content/optimize_chunks/chunk_0010_0020.mp4.
MoviePy - Writing audio in chunk_0010_0020TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0010_0020.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0010_0020.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0010_0020.wav




MoviePy - Done.
Moviepy - Building video /content/optimize_chunks/chunk_0020_0030.mp4.
MoviePy - Writing audio in chunk_0020_0030TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0020_0030.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0020_0030.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0020_0030.wav




MoviePy - Done.
Moviepy - Building video /content/optimize_chunks/chunk_0030_0040.mp4.
MoviePy - Writing audio in chunk_0030_0040TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0030_0040.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0030_0040.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0030_0040.wav




MoviePy - Done.
Moviepy - Building video /content/optimize_chunks/chunk_0040_0050.mp4.
MoviePy - Writing audio in chunk_0040_0050TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0040_0050.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0040_0050.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0040_0050.wav




MoviePy - Done.
Moviepy - Building video /content/optimize_chunks/chunk_0050_0060.mp4.
MoviePy - Writing audio in chunk_0050_0060TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0050_0060.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0050_0060.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0050_0060.wav




MoviePy - Done.
Moviepy - Building video /content/optimize_chunks/chunk_0060_0063.mp4.
MoviePy - Writing audio in chunk_0060_0063TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/optimize_chunks/chunk_0060_0063.mp4





Moviepy - Done !
Moviepy - video ready /content/optimize_chunks/chunk_0060_0063.mp4
MoviePy - Writing audio in /content/optimize_chunks/chunk_0060_0063.wav


                                                       

MoviePy - Done.




Transcription for optimize completed and saved in /content/optimize_chunks


In [None]:
### This script processes video files to extract unique frames using ORB (Oriented FAST and Rotated BRIEF) for keypoint detection.
### It iterates through video files in specified directories, identifies distinct frames by comparing keypoints, and saves these unique frames as images in a new directory, creating a visual summary of the videos.

"""Importing the video"""
from google.colab.patches import cv2_imshow
list1 = ['arthritis','capitalone','facerec-test','lunch','optimize','pills']
directory_path = "/content/"
for entry in os.scandir(directory_path):
    if entry.is_dir() and entry.name.endswith("_chunks"):
        folder_path = os.path.join(directory_path, entry.name)
        for filename in os.listdir(folder_path):
            print(filename)
            if filename.endswith((".mp4", ".mov")):
                video_path = os.path.join(folder_path, filename)

                video_name, _ = os.path.splitext(filename)  # _ is used to ignore the extension part

          # Creating the output directory outside the input directory
#           output_dir = os.path.join(directory, '..', video_name + '_chunks')
                print(video_path)
                capture = cv2.VideoCapture(f'{video_path}')

                """Images array store the frame nos"""
                images = np.asarray([])
                ret, frame = capture.read()
                cap,cap2 = 0,0
                start = np.asarray((500,32))

                """Looping through all the frames to find the unique ones,
                we have used ORB a modified sift method for detection of keypoints in a particular frame
                and compared those to the other frames keypoints to discover the unique frames."""
                cnt = 0
                new_width = 600
                new_height = 400
                while(ret):
                    frame2 = frame
                    frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)

                    orb = cv2.ORB_create()
                    kp = orb.detect(frame,None)
                    kp, des = orb.compute(frame,kp)

                    if des is None:
                        ret, frame = capture.read()
                        if cap2 == 0:
                            images = np.append(images,cap2)
                        if start is not None:
                            images = np.append(images,cap2)
                            start = None
                        cap2 += 1
                        continue

                    if cap == 0 :
                        start = des
                        images = np.append(images,cap2)
                        cnt +=1

                        if not os.path.exists(f'/content/snips/{entry.name}/{video_name}'):
                            # Create the folder
                            os.makedirs(f'/content/snips/{entry.name}/{video_name}')

                        filename = f"/content/snips/{entry.name}/{video_name}/{cnt}.jpg"


                        # Resize the image
                        frame2 = cv2.resize(frame2, (new_width, new_height))
                        cv2.imwrite(filename, frame2)
                        # cv2_imshow(frame2)
                    elif start.shape != des.shape :
                        s = 0
                        for i in range(min(start.shape[0],des.shape[0])):
                            s += np.sum(np.equal(start[i],des[i]))
                        if s < 200:
                            start = des
                            images = np.append(images,cap2)
                            cnt +=1
                            if not os.path.exists(f'/content/snips/{entry.name}/{video_name}'):
                            # Create the folder
                                os.makedirs(f'/content/snips/{entry.name}/{video_name}')

                            filename = f"/content/snips/{entry.name}/{video_name}/{cnt}.jpg"
                            frame2 = cv2.resize(frame2, (new_width, new_height))
                            cv2.imwrite(filename, frame2)
                            # cv2_imshow(frame2)
                    else:
                        s = np.sum(np.equal(start,des))
                        if s < 110:
                            start = des
                            images = np.append(images,cap2)
                            cnt +=1
                            if not os.path.exists(f'/content/snips/{entry.name}/{video_name}'):
                                # Create the folder
                                os.makedirs(f'/content/snips/{entry.name}/{video_name}')

                            filename = f"/content/snips/{entry.name}/{video_name}/{cnt}.jpg"
                            frame2 = cv2.resize(frame2, (new_width, new_height))
                            cv2.imwrite(filename, frame2)
                            # cv2_imshow(frame2)

                    ret, frame = capture.read()
                    cap += 1
                    cap2 += 1

                cv2.destroyAllWindows()
                capture.release()

In [None]:
import os
import zipfile
from google.colab import files

def zip_folder(folder_path, output_path):
    """Compress a folder to a zip file."""
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, os.path.join(folder_path, '..')))

# Specify the path of the folder you want to download
folder_path = '/content/snips'  # Replace with your folder path
zip_file = '/content/final.zip'

# Compress the folder
zip_folder(folder_path, zip_file)

# Download the compressed folder
files.download(zip_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
print("The total number of frames : ",cap,"\nThe number of unique frames extracted : ",len(imagesArr))

The total number of frames :  632 
The number of unique frames extracted :  51


In [None]:
### This code block is a Python script for facial recognition in videos; it identifies and
### records appearances of known individuals from a predefined list in video files, summarizing the results in JSON format.

import face_recognition
import cv2
import os
import numpy as np

# Load known face images and create encodings
known_faces_encodings = []
known_faces_names = []

# Load each known face image
known_face_files = ['/content/known_faces/biden.jpg', '/content/known_faces/brian.png', '/content/known_faces/krish.jpg', '/content/known_faces/raja.png','/content/known_faces/mike.png','/content/known_faces/sam.png', '/content/known_faces/saurav.jpeg', '/content/known_faces/trump.jpg']
for file in known_face_files:
    image = face_recognition.load_image_file(file)
    encoding = face_recognition.face_encodings(image)[0]
    known_faces_encodings.append(encoding)
    known_faces_names.append(os.path.splitext(os.path.basename(file))[0])


directory_path = "/content/"
for entry in os.scandir(directory_path):
    if entry.is_dir() and entry.name.endswith("_chunks"):
        folder_path = os.path.join(directory_path, entry.name)
        for filename in os.listdir(folder_path):

            if filename.endswith((".mp4", ".mov")):
                video_path = os.path.join(folder_path, filename)

                video_name, _ = os.path.splitext(filename)  # _ is used to ignore the extension part

          # Creating the output directory outside the input directory
#           output_dir = os.path.join(directory, '..', video_name + '_chunks')
                # Initialize some variables
                face_locations = []
                face_encodings = []
                face_names = []
                process_this_frame = True

                # Dictionary to hold appearance records
                appearance_records = {}
                video_capture = cv2.VideoCapture(f'{video_path}')
# Open the input video file
# video_capture = cv2.VideoCapture('/content/lunch.mp4')
                frame_number = 0

                # Loop over each frame of the video
                while video_capture.isOpened():
                    # Grab a single frame of video (and check if it's okay)
                    ret, frame = video_capture.read()
                    if not ret:
                        break

                    # Optionally skip frames to speed up processing
                    if frame_number % 5 == 0:  # Change 30 to the number of frames you want to skip
                        # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
                        # rgb_frame = frame[:, :, ::-1]
                        frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
                        # Find all the faces and face encodings in the current frame of video
                        face_locations = face_recognition.face_locations(frame)
                        face_encodings = face_recognition.face_encodings(frame, face_locations)

                        face_names = []
                        for face_encoding in face_encodings:
                            # See if the face is a match for the known face(s)
                            matches = face_recognition.compare_faces(known_faces_encodings, face_encoding, tolerance=0.6)
                            name = "Unknown"

                            # Use the known face with the smallest distance to the new face
                            face_distances = face_recognition.face_distance(known_faces_encodings, face_encoding)
                            best_match_index = np.argmin(face_distances)
                            if matches[best_match_index]:
                                name = known_faces_names[best_match_index]

                            face_names.append(name)

                            # Add the appearance to the records
                            if name not in appearance_records:
                                appearance_records[name] = []
                            appearance_records[name].append(frame_number)

                    frame_number += 1

                # Release handle to the webcam
                video_capture.release()

                # Process the appearance records to get a summary
                appearance_summary = {name: len(frames) for name, frames in appearance_records.items() if name != "Unknown"}

                # Print the summary
                # for name, count in appearance_summary.items():


                appearance_summary

                video_appearance_summary = {}


                # Extract the chunk identifier from the video path
                # chunk_id = video_path.split('/')[-1].replace('.mp4', '')

                # # Get the appearance records for the chunk
                # appearances = appearance_records.get(chunk_id, {})

                # # Filter out the "Unknown" key and get the names
                names = [name for name in appearance_summary if name != "Unknown"]

                # Map the video path to the names
                video_appearance_summary[video_path] = names

                # Convert to JSON
                with open(f'/content/{entry.name}_{video_name}.json', 'w') as json_file:
                  json.dump(video_appearance_summary, json_file, indent=4)

                # # Output the JSON string or save it to a file
                # print(json_output)
