In [1]:
import numpy as np
import cv2
import time

import torch
from torch.autograd import Variable

from os.path import join
from glob import glob

import skimage.io as io
from skimage.transform import resize

from C3D_model import C3D

In [2]:
cuda = False
frames = 16

In [3]:
def get_blocc(vid):
    arr = []
    for i in range(frames):
        arr.append(vid.read()[1])

    blocc = np.array([resize(frame, output_shape=(112, 200), preserve_range=True) for frame in arr])

    blocc = blocc[:, :, 44:44+112, :]
    blocc = blocc.transpose(3, 0, 1, 2)  # ch, fr, h, w
    blocc = np.expand_dims(blocc, axis=0)  # batch axis
    blocc = np.float32(blocc)
    blocc = torch.from_numpy(blocc)
    
    return blocc

In [4]:
def read_labels_from_file(filepath):
    """
    Reads Sport1M labels from file
    
    Parameters
    ----------
    filepath: str
        the file.
        
    Returns
    -------
    list
        list of sport names.
    """
    with open(filepath, 'r') as f:
        labels = [line.strip() for line in f.readlines()]
    return labels

In [5]:
def predict(filename):
    # load a clip to be predicted
    # get network pretrained model
    net = C3D()
    net.load_state_dict(torch.load('c3d.pickle'))

    if cuda:
        net.cuda()
    net.eval()

    # perform prediction
    
    vid = cv2.VideoCapture(filename)
    ind = 0
    while(vid):
        X = get_blocc(vid)
        X = Variable(X)
        if cuda:
                X = X.cuda()
        start_time = time.time()
        prediction = net(X)
        prediction = prediction.data.cpu().numpy()
        print("Time taken for prediction =", time.time()-start_time)
        ind = ind+1
        print(ind)

        # read labels
        labels = read_labels_from_file('labels.txt')

        # print top predictions
        top_inds = prediction[0].argsort()[::-1][:5]  # reverse sort and take five largest items
        print('\nTop 5:')
        for i in top_inds:
          print('{:.5f} {}'.format(prediction[0][i], labels[i]))
        if(ind > 10):
            print(ind)
            break
        print((frames*ind)/)

In [6]:
start_time = time.time()
predict('./vid_data/cycling.mp4')
print("Total time taken = ", time.time()-start_time)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
  probs = self.softmax(logits)


Time taken for prediction = 0.7318611145019531
1

Top 5:
0.20809 road bicycle racing
0.11677 cycling
0.11445 supermoto
0.07423 cross-country cycling
0.04752 isle of man tt
Time taken for prediction = 0.6861779689788818
2

Top 5:
0.26924 road bicycle racing
0.14372 supermoto
0.06851 endurance racing (motorsport)
0.06372 cycling
0.06336 motorcycle racing
Time taken for prediction = 0.6745047569274902
3

Top 5:
0.43744 road bicycle racing
0.08998 cycling
0.06943 supermoto
0.04711 endurance racing (motorsport)
0.03954 duathlon
Time taken for prediction = 0.6841106414794922
4

Top 5:
0.15460 road bicycle racing
0.12969 cycling
0.10011 supermoto
0.07300 streetluge
0.03624 longboarding
Time taken for prediction = 0.6905381679534912
5

Top 5:
0.15867 streetluge
0.07571 longboarding
0.05909 orienteering
0.05532 cross-country skiing
0.05489 road bicycle racing
Time taken for prediction = 0.6834263801574707
6

Top 5:
0.19499 streetluge
0.14353 longboarding
0.10169 freeboard (skateboard)
0.05193 s