In [1]:
import sys
import os
from os import path
import time
from time import sleep

import tensorflow as tf
import numpy as np
# import cv2
# import matplotlib.pyplot as plt
import pylab as plt

from tqdm import tqdm, trange, tqdm_notebook, tnrange
import glob
import time
import pandas as pd
import h5py
import pickle as pkl
import subprocess as sp

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

## Config

In [2]:
slim_dir = "/mnt/workspace/models/research/slim/"
sys.path.insert(0, slim_dir)
from nets import vgg
image_size = vgg.vgg_16.default_image_size
# print(image_size)

from preprocessing import vgg_preprocessing

dataset_dir = "/mnt/workspace/datasets/UCF-101/"
label_dir = "/mnt/workspace/datasets/ucf101/ucf24/labels/"
batch_size = 16
checkpoints_dir = "/mnt/workspace/models/checkpoints/"
extracted_features_dir = "/mnt/workspace/ebrnn-tf/extracted_features/"

# means = [123.68, 116.779, 103.939]
means = [103.939, 116.779, 123.68]

## Define graph 

In [3]:
slim = tf.contrib.slim

with tf.Graph().as_default():
    input_batch = tf.placeholder(dtype=tf.uint8, shape=(batch_size,240,320,3))
    print(input_batch)

    resized_images = tf.image.resize_images(input_batch, [image_size,image_size])
    channels = tf.split(axis=3, num_or_size_splits=3, value=resized_images)
    for j in range(3):
        channels[j] -= means[j]
    normalized_images = tf.concat(axis=3, values=channels)
    
    with slim.arg_scope(vgg.vgg_arg_scope()):
        outputs, end_points = vgg.vgg_16(normalized_images,num_classes=1000, is_training=False)
        final_conv = end_points['vgg_16/conv5/conv5_3']
        fc7 = end_points['vgg_16/fc7']
        probablities = tf.nn.softmax(outputs)
        print(probablities)
        print(fc7)
        print(final_conv)
    init_fn = slim.assign_from_checkpoint_fn(os.path.join(checkpoints_dir, 'vgg_16.ckpt'),slim.get_model_variables('vgg_16'))
    sess = tf.Session()
    init_fn(sess)

Tensor("Placeholder:0", shape=(16, 240, 320, 3), dtype=uint8)
Tensor("Softmax:0", shape=(16, 1000), dtype=float32)
Tensor("vgg_16/fc7/Relu:0", shape=(16, 1, 1, 4096), dtype=float32)
Tensor("vgg_16/conv5/conv5_3/Relu:0", shape=(16, 14, 14, 512), dtype=float32)
INFO:tensorflow:Restoring parameters from /mnt/workspace/models/checkpoints/vgg_16.ckpt


## Read data and extract features

### Some Functions

In [4]:
def read_video(path):
    command = [ 'ffmpeg',
    #            '-i', '/mnt/workspace/datasets/UCF-101/BreastStroke/v_BreastStroke_g01_c01.avi',
    #             '-i', '/mnt/workspace/datasets/UCF-101/BasketballDunk/v_BasketballDunk_g01_c01.avi',
               '-i', '/mnt/workspace/datasets/UCF-101/Basketball/v_Basketball_g01_c03.avi',
    #            '-i', '/mnt/workspace/datasets/UCF-101/Billiards/v_Billiards_g01_c01.avi',
                '-f', 'image2pipe',
                '-pix_fmt', 'rgb24',
                '-vcodec', 'rawvideo', '-']
    pipe = sp.Popen(command, stdout = sp.PIPE, bufsize=10**8)
    video_frames = []
    while True:
        # read 420*360*3 bytes (= 1 frame)
        raw_image = pipe.stdout.read(240*320*3)
    #     print(type(raw_image))
        if len(raw_image) != 240*320*3:
            break;
        # transform the byte read into a numpy array
        image =  np.fromstring(raw_image, dtype='uint8')
        image = image.reshape((240,320,3))
    #     image[:,:,0] = image[:,:,0] - 123.68
    #     image[:,:,1] = image[:,:,1] - 116.78
    #     image[:,:,2] = image[:,:,2] - 103.94
        video_frames.append(image)
        # throw away the data in the pipe's buffer.
        pipe.stdout.flush()
    #     np.shape(image)
    #     plt.imshow(image)
    video_frames = np.asarray(video_frames, dtype=np.uint8)
    return video_frames

In [None]:
def extract_features(video_path):
    
    video_frames = read_video(video_path)
#     print(np.shape(video_frames))
    file_length = np.shape(video_frames)[0]
    with h5py.File((extracted_features_dir + "{}.h5").format(video_path), "w") as f:
        dset = f.create_dataset()
        for start, end in zip(range(0, file_length, batch_size),range(batch_size, file_length + batch_size, batch_size)):

In [None]:
labels = [x.replace(label_dir,"") for x in sorted(glob.glob(label_dir+"*"))]
print(labels)

In [None]:
from IPython.display import clear_output

for i in tnrange(len(labels), desc='Total progress'):
    all_videos = sorted(glob.glob(dataset_dir+labels[i]+"/*"))
    for j in tnrange(len(all_videos), desc=labels[i], leave=False):
        video_path = all_videos[j]
#         video_frames = read_video(video_path)
        features = extract_features(video_path)
#         sleep(0.1)

In [None]:
for start, end in zip(range(0, video_frames.shape[0] + batch_size, batch_size),
                      range(batch_size, video_frames.shape[0] + batch_size, batch_size)):
    

In [None]:
from datasets import imagenet
names = imagenet.create_readable_names_for_imagenet_labels()
print(names)

In [None]:
a = np.zeros((16, 720, 1280, 3), dtype=np.uint8)

In [None]:
print(np.shape(a))
print(np.shape(video_frames))

In [None]:
video_frames = video_frames[:16,:,:,:]
probablities_batch, feats = sess.run([probablities, fc7], feed_dict={input_batch: video_frames})

In [None]:
print(np.shape(probablities_batch))

In [None]:
for i in range(batch_size):
    max_value = max(probablities_batch[i])
#     print(max_value)
    print(list(probablities_batch[i]).index(max_value))

In [None]:
probabilities = probablities_batch[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]

In [None]:
from datasets import imagenet
names = imagenet.create_readable_names_for_imagenet_labels()
for i in range(5):
    index = sorted_inds[i]
    # Shift the index of a class name by one. 
    print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index+1]))