<a href="https://colab.research.google.com/github/teyang-lau/you-only-edit-once/blob/main/scripts/extract_frames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install library requirements to read videos from YouTube. youtube-dl==2020.12.2 allows bypassing the 'dislike counts' bug which causes the code to terminate.

Source: https://stackoverflow.com/questions/70297028/i-have-a-problem-with-dislike-error-when-creating-an-pafy-new-url-perhaps-its

In [1]:
!pip install -q pafy
!pip install -q youtube-dl==2020.12.2

Import libraries

In [2]:
import os
import uuid
import cv2

In [None]:
import pafy
import youtube_dl

Extract frames from video

In [3]:
def video2frames(video_file, output_path, factor=1, youtube=False):

    """Extract frames from a video file or youtube link
    
    Args:
    video_file (str): path to the video
    output_path (str): path to output folder for storing extracted frames
    factor (int): how many seconds to extract 1 frame. 1 = extract a frame every sec, 2 = extract a frame every 2 secs
    youtube (bool): whether to get video directly from youtube link

    """
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    if youtube == False:
        vid = cv2.VideoCapture(video_file) 
    
    elif youtube == True:
        video = pafy.new(video_file)
        best = video.getbest(preftype="mp4")
        vid = cv2.VideoCapture(best.url)
    
    fps = round(vid.get(cv2.CAP_PROP_FPS))
    num_frames = int(vid. get(cv2. CAP_PROP_FRAME_COUNT))
    index = 0        
    while vid.isOpened():
        success, img = vid.read()
        index += 1
        if success:
            # extract every fps frame of the video, multplied by a factor
            # factor of 1 = extract a frame every sec, 2 = extract a frame every 2 secs
            if index % (fps*factor) == 0:
                cv2.imwrite(output_path + '/' + str(uuid.uuid4()) + '_' + str(index) + '.jpg', img)
        # stop reading at end of video
        # need this as some frames return False success, so cannot
        # use success to break the while loop
        if index > num_frames:
            break
    vid.release()
    
    return 

Extracting frames from youtube video

In [4]:
url = 'https://www.youtube.com/watch?v=qvPJsjUjOLo'      # @param {type:"string"}

In [5]:
video2frames(video_file = url,
             output_path = './data/extracted_frames_2',
             factor = 10,
             youtube=True)

fps: 30


In [9]:
!zip -r /content/extracted_frames_2.zip /content/data/extracted_frames_2

  adding: content/data/extracted_frames_2/ (stored 0%)
  adding: content/data/extracted_frames_2/19c8c10d-6eed-40d9-a81b-3e9d7ba498a3_1200.jpg (deflated 1%)
  adding: content/data/extracted_frames_2/bbfe7bfb-d2d9-4aac-86e6-00e7f9790a67_2700.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/a568cd88-16c3-485f-bb37-f556674c8220_4200.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/73964a9e-4b50-42f4-9f49-ea39c546cc86_3600.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/10fecbae-6712-4c9a-9975-81065e474b1b_3000.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/d45f7766-e71e-4109-bb78-30eb07e3a6a4_600.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/22575c00-69b4-40c9-bde1-92998e001b53_3900.jpg (deflated 1%)
  adding: content/data/extracted_frames_2/52a14ecb-5811-40d2-99bf-5fb268d765a0_4800.jpg (deflated 2%)
  adding: content/data/extracted_frames_2/90195e03-a80e-4bf6-86d6-b3cf087d561b_2400.jpg (deflated 0%)
  adding: content/data/extra

In [10]:
from google.colab import files
files.download("/content/extracted_frames_2.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Function to extract frames from multiple videos

In [None]:
def multiple_video2frames( video_path, output_path , ignore_prev_vid=False, prev_vid_file=None):

    """Extract frames from multple videos file
    
    Args:
    video_path (str): path to folder containing all videos
    output_path (str): path to output folder for storing extracted frames
    ignore_prev_vid (bool): whether to ignore previous vidoes that have been already extracted
    prev_vid_file (str): path to text file containing previously extracted video filenames

    """

    vid_count = 0

    if ignore_prev_vid:
        file = open(prev_vid_file)
        text = file.readlines()
        prev_vids = {t.rstrip('\n') : True for t in text} 
        file.close()
        file = open(prev_vid_file, 'a+')

    list_videos = os.listdir(video_path)
    print('Found {} videos'.format(len(list_videos)))
    for video in list_videos:
        # skip video if extracted before
        if ignore_prev_vid and video in prev_vids:
            continue
        # read and extract frame
        vid_count += 1
        print('Extracting Video {}'.format(vid_count))
        video_file = video_path + '/' + video
        video2frames(video_file, output_path)
        # add video name to ignore_prev_vid file
        if ignore_prev_vid:
            file.write(video + '\n')

    if vid_count > 0: 
        print('Extraction Completed!')
            
    return

In [None]:
multiple_video2frames('../data/videos', '../data/extracted_frames', '../data/ignore_prev_vid.txt', True)