<a href="https://colab.research.google.com/github/teyang-lau/you-only-edit-once/blob/main/scripts/extract_frames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install library requirements to read videos from YouTube. youtube-dl==2020.12.2 allows bypassing the 'dislike counts' bug which causes the code to terminate.

Source: https://stackoverflow.com/questions/70297028/i-have-a-problem-with-dislike-error-when-creating-an-pafy-new-url-perhaps-its

In [1]:
!pip install -q pafy
!pip install -q youtube-dl==2020.12.2

Import libraries

In [13]:
import os
import uuid
import cv2
import pafy
import youtube_dl

Extract frames from video

In [43]:
def video2frames(video_file, output_path, factor=1, youtube=False):

    """Extract frames from a video file
    
    Args:
    video_file (str): path to the video
    output_path (str): path to output folder for storing extracted frames
    factor (int): how many seconds to extract 1 frame. 1 = extract a frame every sec, 2 = extract a frame every 2 secs

    """
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    if youtube == False:
        vid = cv2.VideoCapture(video_file)
    
    elif youtube == True:
        video = pafy.new(video_file)
        best = video.getbest(preftype="mp4")
        vid = cv2.VideoCapture(best.url)
    
    fps = round(vid.get(cv2.CAP_PROP_FPS))
    print(f'fps: {fps}')
    index = 0        
    while vid.isOpened():
        success, img = vid.read()
        if success:
            index += 1
            # extract every fps frame of the video, multplied by a factor
            # factor of 1 = extract a frame every sec, 2 = extract a frame every 2 secs
            if index % (fps*factor) == 0:
                cv2.imwrite(output_path + '/' + str(uuid.uuid4()) + '_' + str(index) + '.jpg', img)
        else:
            break
    vid.release()
    
    return 

Extracting frames from youtube video

In [None]:
url = "https://www.youtube.com/watch?v=ptygf0cPtUk"
video2frames(video_file = url,
             output_path = './data/extracted_frames_2',
             factor = 1,
             youtube=True)

In [48]:
!zip -r /content/extracted_frames.zip /content/data/extracted_frames

  adding: content/data/extracted_frames_2/ (stored 0%)
  adding: content/data/extracted_frames_2/0aa7bff8-8017-40d3-84bb-056e8982496d_1110.jpg (deflated 3%)
  adding: content/data/extracted_frames_2/8d75d93c-3b57-44c0-90c1-be6e384c452f_1230.jpg (deflated 2%)
  adding: content/data/extracted_frames_2/bda8337a-1197-4214-b9f3-182534d07712_2880.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/fb00f503-02e4-4c75-8e14-348fbf5f6a61_480.jpg (deflated 9%)
  adding: content/data/extracted_frames_2/904d39f5-032c-4976-bdbc-3c2d61821988_1200.jpg (deflated 0%)
  adding: content/data/extracted_frames_2/33fb4dbd-993e-4b32-9a1f-13f684958799_1710.jpg (deflated 2%)
  adding: content/data/extracted_frames_2/b93d2999-dccc-47ed-8bec-76217e0232bc_270.jpg (deflated 2%)
  adding: content/data/extracted_frames_2/1119aec1-16ce-4d0d-8e49-93684ef371cc_300.jpg (deflated 3%)
  adding: content/data/extracted_frames_2/ec593f8c-7d3c-4ae2-a824-32a01c04a5a6_2010.jpg (deflated 1%)
  adding: content/data/extract

In [49]:
from google.colab import files
files.download("/content/extracted_frames.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Function to extract frames from multiple videos

In [None]:
def multiple_video2frames( video_path, output_path , ignore_prev_vid=False, prev_vid_file=None):

    """Extract frames from multple videos file
    
    Args:
    video_path (str): path to folder containing all videos
    output_path (str): path to output folder for storing extracted frames
    ignore_prev_vid (bool): whether to ignore previous vidoes that have been already extracted
    prev_vid_file (str): path to text file containing previously extracted video filenames

    """

    vid_count = 0

    if ignore_prev_vid:
        file = open(prev_vid_file)
        text = file.readlines()
        prev_vids = {t.rstrip('\n') : True for t in text} 
        file.close()
        file = open(prev_vid_file, 'a+')

    list_videos = os.listdir(video_path)
    print('Found {} videos'.format(len(list_videos)))
    for video in list_videos:
        # skip video if extracted before
        if ignore_prev_vid and video in prev_vids:
            continue
        # read and extract frame
        vid_count += 1
        print('Extracting Video {}'.format(vid_count))
        video_file = video_path + '/' + video
        video2frames(video_file, output_path)
        # add video name to ignore_prev_vid file
        if ignore_prev_vid:
            file.write(video + '\n')

    if vid_count > 0: 
        print('Extraction Completed!')
            
    return

In [None]:
multiple_video2frames('../data/videos', '../data/extracted_frames', '../data/ignore_prev_vid.txt', True)