In [1]:
import os
import io
import time
import builtins
from torch import save, load
import torch.optim
import torch.nn.parallel
from torch.nn import functional as F

from models import load_model, load_transform, load_categories
from utils import extract_frames, CloudFileProxy

from cloudbutton import Pool, CloudStorage
from cloudbutton.util import get_uuid

In [2]:
ROOT_URL = 'http://moments.csail.mit.edu/moments_models'
WEIGHTS_FILE = 'moments_RGB_resnet50_imagenetpretrained.pth.tar'

# Download pretrained resnet50 model/weights
if not os.access(WEIGHTS_FILE, os.R_OK):
    os.system('wget ' + '/'.join([ROOT_URL, WEIGHTS_FILE]))

In [3]:
LOCAL_EXEC = False
INPUT_DATA_DIR = 'momentsintime/input_data'
CONCURRENCY = 100

In [4]:
if LOCAL_EXEC:
    initargs = {}
    weights_location = '/dev/shm/' + WEIGHTS_FILE
    video_locations = [os.path.abspath(os.path.join(INPUT_DATA_DIR, name)) 
                        for name in os.listdir(INPUT_DATA_DIR)]
    open = builtins.open
else:
    RUNTIME = 'dhak/pywren-runtime-pytorch:3.6'
    initargs = {
        'runtime': RUNTIME,
        'runtime_memory': 1024
        }
    weights_location = 'momentsintime/models/' + WEIGHTS_FILE
    cloud_storage = CloudStorage()
    #cloud_storage.put_data(key='momentsintime/input_data/juggling.mp4', data=open('momentsintime/input_data/juggling.mp4', 'rb'))
    video_locations = cloud_storage.list_tmp_data(prefix=INPUT_DATA_DIR)
    open = CloudFileProxy(cloud_storage)

In [None]:
with builtins.open(WEIGHTS_FILE, 'rb') as f_in:
    with open(weights_location, 'wb') as f_out:
        f_out.write(f_in.read())

In [5]:
NUM_SEGMENTS = 16

# Get dataset categories
categories = load_categories()

# Load the video frame transform
transform = load_transform()

def predict_video(open, weights_location, video_locations):
    with open(weights_location, 'rb') as f:
        model = load_model(f)
    model.eval()

    results = []
    local_video_loc = 'video_to_predict_{}.mp4'.format(get_uuid())

    for video_loc in video_locations:
        start = time.time()
        with open(video_loc, 'rb') as f_in:
            with builtins.open(local_video_loc, 'wb') as f_out:
                f_out.write(f_in.read())

        # Obtain video frames
        frames = extract_frames(local_video_loc, NUM_SEGMENTS)

        # Prepare input tensor [num_frames, 3, 224, 224]
        input_v = torch.stack([transform(frame) for frame in frames])

        # Make video prediction
        with torch.no_grad():
            logits = model(input_v)
            h_x = F.softmax(logits, 1).mean(dim=0)
            probs, idx = h_x.sort(0, True)

        # Output the prediction
        output = dict(key=video_loc, result={})
        for i in range(0, 5):
            output['predictions'][categories[idx[i]]] = round(float(probs[i]), 5)
        output['whole_duration'] = time.time() - start
        results.append(output)

    return output

In [7]:
with Pool(initargs=initargs) as pool:
    iterable = [(open, weights_location, video_locations[n::CONCURRENCY]) 
                for n in range(CONCURRENCY) if n < len(video_locations)]
    res = pool.map_async(func=predict_video, iterable=iterable)
    print(res.get())


PyWren v1.5.2 init for IBM Cloud Functions - Namespace: pol23btr%40gmail.com_dev - Region: eu_gb
ExecutorID 043bf6/1 | JobID M000 - Selected Runtime: dhak/pywren-runtime-pytorch:3.6 - 1024MB 
ExecutorID 043bf6/1 | JobID M000 - Uploading function and data - Total: 168.1KiB
0/|/ 0/1ExecutorID 043bf6/1 | JobID M000 - Starting function invocation: predict_video()  - Total: 1 activations
ExecutorID 043bf6/1 - Getting results...
1/|/ 1/1[{'key': 'momentsintime/input_data/juggling.mp4', 'result': {'juggling': 0.99872, 'flipping': 0.00027, 'catching': 0.00016, 'child+speaking': 6e-05, 'child+singing': 5e-05}, 'whole_duration': 1.9780809879302979}]
ExecutorID 043bf6/1 - Cleaning temporary data



In [None]:
def predict_video(open, weights_location, video_locations):
    with builtins.open(weights_location, 'rb') as f:
        model = load_model(f)
    model.eval()

    results = []
    local_video_loc = 'video_to_predict_{}.mp4'.format(get_uuid())

    for video_loc in video_locations:
        start = time.time()
        with open(video_loc, 'rb') as f_in:
            with builtins.open(local_video_loc, 'wb') as f_out:
                f_out.write(f_in.read())

        # Obtain video frames
        frames = extract_frames(local_video_loc, NUM_SEGMENTS)

        # Prepare input tensor [num_frames, 3, 224, 224]
        input_v = torch.stack([transform(frame) for frame in frames])

        # Make video prediction
        with torch.no_grad():
            logits = model(input_v)
            h_x = F.softmax(logits, 1).mean(dim=0)
            probs, idx = h_x.sort(0, True)

        # Output the prediction
        output = dict(key=video_loc, result={})
        for i in range(0, 5):
            output['predictions'][categories[idx[i]]] = round(float(probs[i]), 5)
        output['whole_duration'] = time.time() - start
        results.append(output)

    return output

In [None]:
CUSTOM_RUNTIME = 'dhak/pywren-runtime-resnet'
weights_location = '/momentsintime/model_weights'

initargs = {
    'runtime': CUSTOM_RUNTIME, 
    'runtime_memory': 1024
    }
with Pool(initargs=initargs) as pool:
    iterable = [(open, weights_location, video_locations[n::CONCURRENCY]) 
                for n in range(CONCURRENCY) if n < len(video_locations)]
    res = pool.map_async(func=predict_video, iterable=iterable)
    print(res.get())


In [None]:
# Clean
if os.path.isfile(WEIGHTS_FILE):
    os.remove(WEIGHTS_FILE)

if LOCAL_EXEC:
    if os.path.isfile(weights_location):
        os.remove(weights_location)
else:
    cloud_storage.delete_cobject(key=weights_location)