In [22]:
import numpy as np
import progressbar 
import random 
import os
import cv2
import h5py
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer

import keras

from keras.applications import imagenet_utils
from keras.applications import Xception

from keras.layers import Input
from keras.layers import GlobalAveragePooling2D
from keras.models import Model
from keras.models import model_from_json
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.sequence import pad_sequences

import matplotlib.pyplot as plt
%matplotlib inline

from helpers import HDF5DatasetWriter
from helpers import Utils

In [23]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [24]:
root = 'datasets/UCF-101-frames-resize/'
include = ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 
           'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball',
           'BasketballDunk', 'BenchPress', 'Biking', 'Billiards']

In [25]:
output_path = "../input/datasets/UCF-101-hdf5/no-sequence-xception-imagenet-features.hdf5"

In [6]:
full_categories = [x[0] for x in os.walk(root) if x[0]][1:]
#categories = [c for c in full_categories if c in [os.path.join(root, e) for e in include]]
categories = [c for c in full_categories[0:70]]
categories

['datasets/UCF-101-frames-resize/ApplyEyeMakeup',
 'datasets/UCF-101-frames-resize/ApplyLipstick',
 'datasets/UCF-101-frames-resize/Archery',
 'datasets/UCF-101-frames-resize/BabyCrawling',
 'datasets/UCF-101-frames-resize/BalanceBeam',
 'datasets/UCF-101-frames-resize/BandMarching',
 'datasets/UCF-101-frames-resize/BaseballPitch',
 'datasets/UCF-101-frames-resize/Basketball',
 'datasets/UCF-101-frames-resize/BasketballDunk',
 'datasets/UCF-101-frames-resize/BenchPress',
 'datasets/UCF-101-frames-resize/Biking',
 'datasets/UCF-101-frames-resize/Billiards',
 'datasets/UCF-101-frames-resize/BlowDryHair',
 'datasets/UCF-101-frames-resize/BlowingCandles',
 'datasets/UCF-101-frames-resize/BodyWeightSquats',
 'datasets/UCF-101-frames-resize/Bowling',
 'datasets/UCF-101-frames-resize/BoxingPunchingBag',
 'datasets/UCF-101-frames-resize/BoxingSpeedBag',
 'datasets/UCF-101-frames-resize/BreastStroke',
 'datasets/UCF-101-frames-resize/BrushingTeeth',
 'datasets/UCF-101-frames-resize/CleanAndJerk

In [7]:
def get_image(img_path):
    img = cv2.imread(img_path)
    return img

In [8]:
data = []
for c, category in enumerate(categories):
    images = [os.path.join(dp, f) for dp, dn, filenames 
              in os.walk(category) for f in filenames 
              if os.path.splitext(f)[1].lower() in ['.jpg','.png','.jpeg']]
    for img_path in images:
        name = np.str(img_path.split('/')[2].split('_')[1]+img_path.split('/')[2].split('_')[2])
        img= get_image(img_path)
        data.append({'img':np.array(img), 'label': np.array([c, name])})

In [9]:
x, y= np.array([t["img"] for t in data]), np.array([t["label"] for t in data])

In [10]:
y[0]

array(['0', 'ApplyEyeMakeupg01'], 
      dtype='<U21')

In [11]:
input_tensor = Input(shape=(data[0]['img'].shape))

In [12]:
cnn_model = Xception(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in cnn_model.layers:
    layer.trainable = False

In [13]:
cnn_model_output = cnn_model.output
cnn_bottleneck = GlobalAveragePooling2D()(cnn_model_output)
cnn_bottleneck_model = Model(inputs=cnn_model.input, outputs=cnn_bottleneck)

In [14]:
#cnn_bottleneck_model.summary()

In [15]:
image = get_image("datasets/UCF-101-frames-resize/ApplyEyeMakeup/v_ApplyEyeMakeup_g06_c04_3.jpg")
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
features = cnn_bottleneck_model.predict(image)
features.shape

(1, 2048)

In [16]:
def extract_feature(model, x, y, output_path, batch_size = 32, buffer_size = 1000):
    dataset = HDF5DatasetWriter((x.shape[0], 2048), y.shape, output_path, bufSize = buffer_size)
    
    widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    pbar = progressbar.ProgressBar(maxval=x.shape[0], widgets=widgets).start()

    for i in np.arange(0, x.shape[0], batch_size):
        batchData = x[i:i + batch_size]
        batchLabels = y[i:i + batch_size]
        batchImages = []
        for (j, image) in enumerate(batchData):
            image = img_to_array(image)
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image)
            batchImages.append(image)
        batchImages = np.vstack(batchImages)
        features = model.predict(batchImages, batch_size=batch_size)
        #features = features.reshape((features.shape[0], 8*10*256))
        dataset.add(features, batchLabels)
        pbar.update(i)
    dataset.close()
    pbar.finish()

In [17]:
extract_feature(cnn_bottleneck_model, x, y, output_path, batch_size = 32, buffer_size = 1000)

Extracting Features: 100% |#####################################| Time: 0:01:10


In [18]:
db = h5py.File(output_path)

x = db["images"][:]
y = db["labels"][:]

In [19]:
x.shape

(66376, 2048)

In [20]:
y.shape

(66376, 2)

In [21]:
y[0]

array(['0', 'ApplyEyeMakeupg01'], dtype=object)