In [5]:
# boilerplate imports, trim later
import math
from random import sample
from random import randint
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib.pyplot as plt
from livelossplot import PlotLosses
import cv2
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Dataset Loading

In [16]:
def load_video(filepath, start_frame=0, end_frame=-1):
    # import video
    video = cv2.VideoCapture(filepath)
    # get frame size (to size array) and number of frames
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/ resizing image
    scale_percent = 50
    width = int(frame_width * scale_percent / 100)
    height = int(frame_height * scale_percent / 100)
    dim = (width, height)
    
    if end_frame == -1 or end_frame > frame_count:
        end_frame = frame_count

    frames = np.empty((end_frame-start_frame, height, width, 3), np.dtype('uint8'))

    for frame in range(start_frame,min(end_frame,frame_count)):
        success, img = video.read()
        if not success: break
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # reduce image size to help model memory
        frames[frame] = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)

    video.release()
    return frames

def load_video_random_frames(filepath, number_of_frames):
    # import video
    video = cv2.VideoCapture(filepath)
    # get frame size (to size array) and number of frames
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/ resizing image
    scale_percent = 50
    width = int(frame_width * scale_percent / 100)
    height = int(frame_height * scale_percent / 100)
    dim = (width, height)                  
    
    frames = np.empty((number_of_frames, height, width, 3), np.dtype('uint8'))
    
    used_frames = []
    for frame in range(number_of_frames):
        num = randint(0,frame_count-1)
        while True:
            if num not in used_frames:
                used_frames.append(num)
                break
            num = randint(0,frame_count-1)
        video.set(cv2.CAP_PROP_POS_FRAMES,num);
        success, img = video.read()
        if not success: break
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # reduce image size to help model memory
        frames[frame] = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
                     
    video.release()
    return frames

In [17]:
MAFIA_FILEPATH = "datasets/game/MafiaVideogame.mp4"
GODFATHER_FILEPATH = "datasets/movie/TheGodfather.mp4"
IRISHMAN_FILEPATH = "datasets/movie/TheIrishman.mp4"
SOPRANOS_FILEPATH = "datasets/movie/TheSopranos.mp4"

MAFIA_FRAMES = load_video_random_frames(MAFIA_FILEPATH,10)
#GODFATHER_FRAMES = load_video_random_frames(GODFATHER_FILEPATH)
#IRISHMAN_FRAMES = load_video_random_frames(IRISHMAN_FILEPATH)
#SOPRANOS_FRAMES = load_video_random_frames(SOPRANOS_FILEPATH)

"""
# view frames
count = 0
for frame in MAFIA_FRAMES:
    cv2.namedWindow(f'frame {count}')
    cv2.imshow(f'frame {count}', frame)
    cv2.waitKey(0)
    cv2.destroyWindow(f'frame {count}')
    count+=1
"""