In [1]:
!pip install tensorflow==1.13.1
!pip install FFmpeg
!pip install ffmpeg-python

In [474]:
import random, time, ffmpeg
import numpy as np
from math import ceil
import threading
import cv2

In [499]:
model_settings = {'max_steps': 10000, 'batch_size': 25, 'frames_per_batch': 16,
                  'video_fps': 12, 'crop_size': 112, 'num_gpu': 1, 'channels': 3,
                  'checkpoint_dir': './checkpoints',
                  # 'model_read_dir' : './models/s1m_mod.model',
                  # 'model_save_dir' : './models/C3D_1.model',
                  'moving_decay': 0.9999, 'weight_decay': 0.00005, 'dropout': 0.5,
                  'learning_rate': 1e-4, 'checkpoints': 100,
                  'data_home': '../datasets/UCF-101/',
                  'train_test_loc': '../datasets/UCF-ActionRecognitionSplits',
                  'train_file_name': '/trainlist01.txt',
                  'test_file_name': '/testlist01.txt',
                  'num_thread': 8, 'queue_size': 1000,
                  'read_pretrained_model': True}

model_settings['total_batch'] = model_settings['batch_size'] * model_settings['num_gpu']
model_settings['input_shape'] = (model_settings['frames_per_batch'],
                                 model_settings['crop_size'],
                                 model_settings['crop_size'],
                                 model_settings['channels'])

mean_dir = '../datasets/PreprocessData/' + 'crop_mean.npy'
model_settings['np_mean'] = np.load(mean_dir).reshape(model_settings['input_shape'])

In [500]:
train_dir_locations = model_settings['train_test_loc'] + model_settings['train_file_name']
dir_videos, label_clips = get_data_dir(train_dir_locations)
model_settings['train_list'] = shuffle_list(dir_clips, label_clips)

In [523]:
# Reads train/test filenames from provided splits
# Returns video directions and their labels in a list
def get_data_dir(filename):
    dir_videos, label_videos = [], []
    with open(filename, 'r') as input_file:
        for line in input_file:
            file_name, label = line.split(' ')
            dir_videos.append(file_name)
            label_videos.append(int(label))
    return dir_videos, label_videos


# Shuffles video directions along with labels
def shuffle_list(dir_videos, label_videos, seed=time.time()):
    video_indices = list(range(len(dir_videos)))
    random.seed(seed)
    random.shuffle(video_indices)
    shuffled_video_dirs = [dir_videos[i] for i in video_indices]
    shuffled_labels = [label_videos[i] for i in video_indices]
    return shuffled_video_dirs, shuffled_labels
    
# Given video directory it reads the video
# extracts the frames, and do preprocessing operation
def read_clip(dirname, model_settings):
    dirname = model_settings['data_home'] + dirname
    frames_per_batch = model_settings['frames_per_batch']
    video_fps = model_settings['video_fps']
    crop_size = model_settings['crop_size']
    np_mean = model_settings['np_mean']
    horizontal_flip = random.random()

    probe = ffmpeg.probe(dirname)
    video_info = probe["streams"][0]
    video_width = video_info["width"]
    video_height = video_info["height"]
    video_duration = float(video_info["duration"])
    num_frame = int(video_info["nb_frames"])

    rand_max = int(num_frame - ((num_frame / video_duration) * (frames_per_batch / video_fps)))
    start_frame = random.randint(0, rand_max - 1)
    #end_frame = ceil(start_frame + (num_frame / video_duration) * frames_per_batch / video_fps + 1)
    video_start = (video_duration / num_frame) * start_frame
    video_end = video_start + ((frames_per_batch+1) / video_fps) 

    x_pos = max(video_width - video_height, 0) // 2
    y_pos = max(video_height - video_width, 0) // 2
    crop_size1 = min(video_height, video_width)
    # Input video
    ff = ffmpeg.input(dirname, ss=video_start, t=video_end-video_start)
    # Trim video
    #ff = ff.trim(end_frame='50')
    # Divide into frames
    ff = ffmpeg.filter(ff, 'fps', video_fps)
    # Crop
    ff = ffmpeg.crop(ff, x_pos, y_pos, crop_size1, crop_size1)
    # Subsample
    ff = ffmpeg.filter(ff, 'scale', crop_size, crop_size)
    # Horizontal flip with some probability
    if horizontal_flip > 0.5:
        ff = ffmpeg.hflip(ff)
    # Output the video
    ff = ffmpeg.output(ff, 'pipe:',
                       format='rawvideo',
                       pix_fmt='rgb24')
    # Run Process in quiet mode
    out, _ = ffmpeg.run(ff, capture_stdout=True, quiet=True)
    # Extract to numpy array
    video = np.frombuffer(out, np.uint8). \
        reshape([-1, crop_size, crop_size, 3])

    # Substracts the mean and converts type to float32
    video = video[:16]# - np_mean
    #print(video.shape)
    return video


def read_clips(dirnames, model_settings):
    for dirname in dirnames:
        read_clip(dirname, model_settings)

In [524]:
dirname

'Biking/v_Biking_g10_c06.avi'

In [531]:
dirname = dir_videos[1500]
print(dirname)
video = read_clip(dirname, model_settings)

Bowling/v_Bowling_g17_c05.avi


In [476]:
dirnames_threads = []
threads = []

for i in range(8):
    cur = []
    for j in range(5):
        read_index = 5 * i + j
        video_dir, label = dir_videos[read_index], label_clips[read_index]
        video_dir = model_settings['data_home'] + video_dir
        cur.append(video_dir)
    dirnames_threads.append(cur)


for i in range(8):
    dirnames = dirnames_threads[i]
    threads.append(threading.Thread(target=read_clips, args=(dirnames, model_settings)))

time0 = time.time()
for i in range(8):
    threads[i].start()

for i in range(8):
    threads[i].join()
print('Time diff:', time.time() - time0)

Time diff: 0.6578550338745117


In [438]:
index = 150
dirname = model_settings['data_home'] + dir_videos[index]
frames_per_batch = model_settings['frames_per_batch']
video_fps = model_settings['video_fps']
crop_size = model_settings['crop_size']
np_mean = model_settings['np_mean']
horizontal_flip = random.random()

probe = ffmpeg.probe(dirname)
video_info = probe["streams"][0]
video_width = video_info["width"]
video_height = video_info["height"]
video_duration = float(video_info["duration"])
num_frame = int(video_info["nb_frames"])

rand_max = int(num_frame - ((num_frame / video_duration) * (frames_per_batch / video_fps)))
start_frame = random.randint(0, rand_max - 1)
end_frame = ceil(start_frame + (num_frame / video_duration) * frames_per_batch / video_fps + 1)
video_start = (video_duration / num_frame) * start_frame
video_end = video_start + ((frames_per_batch+1) / video_fps) 
print(end_frame-start_frame, video_start, video_end)

x_pos = max(video_width - video_height, 0) // 2
y_pos = max(video_height - video_width, 0) // 2
crop_size1 = min(video_height, video_width)


35 1.6 3.0166666666666666


In [441]:
# Input video
ff = ffmpeg.input(dirname, ss=video_start, t=video_end-video_start)
# Trim video
#ff = ff.trim(end_frame='50')
# Divide into frames
ff = ffmpeg.filter(ff, 'fps', video_fps)
# Crop
ff = ffmpeg.crop(ff, x_pos, y_pos, crop_size1, crop_size1)
# Subsample
ff = ffmpeg.filter(ff, 'scale', crop_size, crop_size)
# Horizontal flip with some probability
if horizontal_flip > 0.5:
    ff = ffmpeg.hflip(ff)
# Output the video
ff = ffmpeg.output(ff, 'pipe:',
                   format='rawvideo',
                   pix_fmt='rgb24')
# Run Process in quiet mode
out, _ = ffmpeg.run(ff, capture_stdout=True, quiet=True)
# Extract to numpy array
video = np.frombuffer(out, np.uint8). \
    reshape([-1, crop_size, crop_size, 3])

# Substracts the mean and converts type to float32
video = video[:16] - np_mean
#print(video.shape)


(16, 112, 112, 3)


In [None]:
index = 150
dirname = model_settings['data_home'] + dir_videos[index]
frames_per_batch = model_settings['frames_per_batch']
video_fps = model_settings['video_fps']
crop_size = model_settings['crop_size']
np_mean = model_settings['np_mean']
horizontal_flip = random.random()

probe = ffmpeg.probe(dirname)
video_info = probe["streams"][0]
video_width = video_info["width"]
video_height = video_info["height"]
video_duration = float(video_info["duration"])
num_frame = int(video_info["nb_frames"])

rand_max = int(num_frame - ((num_frame / video_duration) * (frames_per_batch / video_fps)))
start_frame = random.randint(0, rand_max - 1)
end_frame = ceil(start_frame + (num_frame / video_duration) * frames_per_batch / video_fps + 1)
#end_frame = min(end_frame, num_frame)

x_pos = max(video_width - video_height, 0) // 2
y_pos = max(video_height - video_width, 0) // 2
crop_size1 = min(video_height, video_width)