In [75]:
import numpy as np
import progressbar 
import random 
import os
import cv2
import h5py
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer

import keras

from keras.applications import imagenet_utils
from keras.applications import Xception

from keras.layers import Input
from keras.layers import GlobalAveragePooling2D
from keras.models import Model
from keras.models import model_from_json
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.sequence import pad_sequences

import matplotlib.pyplot as plt
%matplotlib inline

from helpers import HDF5DatasetWriter
from helpers import Utils

In [76]:
input_path = "../input/datasets/UCF-101-hdf5/no-sequence-xception-imagenet-features.hdf5"
output_path = "../input/datasets/UCF-101-hdf5/fixed-sequence-xception-imagenet-features-shuffle.hdf5"

In [77]:
db = h5py.File(input_path)
x = db["images"][:]
y = db["labels"][:]

In [78]:
y[0]

array(['0', 'ApplyEyeMakeupg01'], dtype=object)

In [79]:
data = []
current_sequence_name = []
next_sequence_name = y[0][1]
img_sequence = []
img_count = 0
img_max = 0
img_min = 60
for i, img in enumerate(x):
    sequence_name = y[i][1]
    current_sequence_name = sequence_name
    if current_sequence_name == next_sequence_name:
        img_count = img_count + 1
        if img_count <= 20:
            img_sequence.append(img)
    else:
        next_sequence_name = current_sequence_name
        data.append({'imgs':np.array(img_sequence), 'label':y[i][0]})
        img_sequence = []
        if img_count < img_min:
            img_min = img_count
        if img_count > img_max:
            img_max = img_count
        img_count = 0

In [80]:
np.random.shuffle(data)
data = np.array(data)

In [81]:
data.shape

(1249,)

In [82]:
data[0]['imgs'].shape

(20, 2048)

In [83]:
data[0]['label']

'13'

In [84]:
x, y = np.array([t["imgs"] for t in data]), [t["label"] for t in data]

lb = LabelBinarizer()
y = lb.fit_transform(y)

In [85]:
x.shape

(1249, 20, 2048)

In [86]:
y.shape

(1249, 50)

In [87]:
def export_data(x, y, output_path, batch_size, buffer_size):
    dataset = HDF5DatasetWriter(x.shape, y.shape, output_path, bufSize = buffer_size)
    
    widgets = ["Exporting Data: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    pbar = progressbar.ProgressBar(maxval=x.shape[0], widgets=widgets).start()
    
    for i in np.arange(0, x.shape[0], batch_size):
        batchData = x[i:i + batch_size]
        batchLabels = y[i:i + batch_size]
        dataset.add(batchData, batchLabels)
        pbar.update(i)
    dataset.close()
    pbar.finish()

In [88]:
export_data(x, y, output_path, 32, 1000)

Exporting Data: 100% |##########################################| Time: 0:00:00


In [93]:
db = h5py.File(output_path)

x = db["images"][:]
y = db["labels"][:]

In [94]:
x.shape

(1249, 20, 2048)

In [95]:
y.shape

(1249, 50)

In [96]:
y[0]

array(['0', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'], dtype=object)