In [None]:
import os
import glob

HOME_PATH = '/home/shubhams/Hercules/kidstube-data/video_splits/'

## Get all video directories

In [None]:
frame_directories = []
for file in os.listdir('/home/shubhams/Hercules/kidstube-data/videos/'):
    file = file.split('.')[0]
    frame_directories.append(os.path.join(HOME_PATH, file, 'frames_6fps/'))

In [None]:
from PIL import Image
import numpy as np

def read_image(img_path):
    if os.path.isfile(img_path):
        img = Image.open(img_path)
        return np.asarray(img)
    else:
        return np.zeros((1,))

In [None]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [None]:
import re

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

## Initialise VGG16 Model

In [None]:
import torch
import torchvision.models as models
import torch.nn as nn

use_cuda = torch.cuda.is_available()

vgg16 = models.vgg16(pretrained=True)
# vgg16.classifier = nn.Sequential(*list(vgg16.classifier.children())[:-2])
layers = list(vgg16.features.children())
layers.append(nn.AdaptiveMaxPool2d(1))
modified_vgg16 = nn.Sequential(*layers)
for p in modified_vgg16.parameters():
    p.requires_grad = False
modified_vgg16.eval()
if use_cuda:
    modified_vgg16.cuda()
print(modified_vgg16)

In [None]:
from torch.autograd import Variable
import torchvision.transforms as transforms

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
normalizer = transforms.Compose([transforms.ToTensor(), normalize])

def get_vgg_features_from_frame(frame_paths):
    tensor_list = []
    for frame_path in frame_paths:
        frame = read_image(frame_path)
        normalized_frame = normalizer(frame)
        normalized_frame = normalized_frame.unsqueeze(0)
        tensor_list.append(normalized_frame)
    frame_tensors = Variable(torch.cat(tensor_list, 0))
    if use_cuda:
        frame_tensors = frame_tensors.cuda()
    frame_features = modified_vgg16(frame_tensors)
    frame_features = frame_features.view(frame_features.shape[0], frame_features.shape[1])
    np_frame_features = frame_features.cpu().data.numpy()
    start = np_frame_features.shape[0]
    for i in range(start, 6):
        np_frame_features = np.insert(np_frame_features, i, 0, axis=0)
    return np_frame_features

## Save data as HDF5 Datasets

In [None]:
import h5py

def save_checkpoint(frame_data, video_ids, path='/home/shubhams/Hercules/kidstube-data/processed/aggregate_1_sec/frames_features.hdf5'):
    if not os.path.exists(os.path.dirname(path)):
        os.makedirs(os.path.dirname(path))
    with h5py.File(path, 'a', libver='latest') as f:
        frame_data = np.array(frame_data)
        video_ids = np.array(video_ids)
        
        try:
            frame_dset = f['frames']
            vids_dset = f['vids']
        except KeyError:
            frame_dset = f.create_dataset('frames', shape=(0, 6, 512), maxshape=(None, 6, 512), compression = 'gzip')
            vids_dset = f.create_dataset('vids', shape=(0, ), maxshape=(None, ), compression = 'gzip', dtype=h5py.special_dtype(vlen=str))
            f.swmr_mode = True

        new_frame_shape = frame_data.shape[0]
        new_vids_shape = video_ids.shape[0]
        
        frame_dset.resize(frame_dset.shape[0] + new_frame_shape, axis=0)
        vids_dset.resize(vids_dset.shape[0] + new_vids_shape, axis=0)
        
        frame_dset[-new_frame_shape:] = frame_data
        vids_dset[-new_vids_shape:] = video_ids
        print(frame_dset.shape)

## Iterate over data

In [None]:
for directory in frame_directories:
    features = []
    vids = []
    frame_files = natural_sort(glob.glob(directory+'*.jpg'))
    frame_files_per_second = list(chunks(frame_files, 6))
    ctr = 0
    for frames_per_second in frame_files_per_second:
        if len(frames_per_second) > 3:
            frame_features = get_vgg_features_from_frame(frames_per_second)
            features.append(frame_features)
            vids.append(directory.split(os.sep)[-3])
            print(ctr)
            ctr += 1
#     save_checkpoint(features, vids)

# FIN