# Test Speeds from Rutgers Server vs Azure Blob

#### Build a list of frames to process using the dbcamhd.json database

In [None]:
import numpy as np
import pandas as pd
import pycamhd as camhd
dbcamhd = pd.read_json('dbcamhd.json', orient='records', lines=True)
dbcamhd.tail()

In [None]:
fileindex = 2064
filename = dbcamhd.filename[fileindex]
timestamp = dbcamhd.timestamp[fileindex]
frame_count = dbcamhd.frame_count[fileindex]
n_images = 4000
frame_numbers = np.linspace(750,frame_count-6000, n_images, dtype=np.int64())
filename

#### Create timestamps for frames

In [None]:
from datetime import datetime
timestamps = []
for i in range(len(frame_numbers)):
    timestamps.append(datetime.fromtimestamp(dbcamhd.timestamp[fileindex] + frame_numbers[i]/29.95))
timestamps[0:5]

#### Set up Rutgers Dask array and Xarray

In [None]:
from dask import delayed
import dask.array as da
import xarray as xr

In [None]:
delayed_frames = []
moov_atom = camhd.get_moov_atom(filename)
for frame_number in frame_numbers:
    delayed_frames.append(da.from_delayed(
                            delayed(camhd.get_frame)(filename, frame_number, 'rgb24', moov_atom),
                            shape=(1080, 1920, 3), dtype=np.uint8)[None,:,:,:])
delayed_frames[0]

In [None]:
ds_rutgers = xr.DataArray(da.concatenate(delayed_frames, axis=0), dims=['time', 'y', 'x', 'channel'],
                  coords={'time': timestamps}
                 ).to_dataset(name='video')
ds_rutgers

#### Start a Dask cluster

In [None]:
from dask_kubernetes import KubeCluster
cluster = KubeCluster(n_workers=32)
cluster

In [None]:
from dask.distributed import Client
client = Client(cluster)
client

#### Compute the time-average of all images using Rutgers server and plot

In [None]:
%%time
mean_image = ds_rutgers.video.mean(dim='time').load()

In [None]:
mean_image.astype('i8').plot.imshow();

#### Create a list of Azure blobs to process

In [None]:
blob_urls = []
for frame_number in frame_numbers:
    blob_urls.append('https://camhd.blob.core.windows.net/prores/%i-%08.0f' % (timestamp, frame_number))
blob_urls[0]

#### Get frame from Azure function

In [None]:
import requests
def azure_get_frame(blob_url):
    blob = requests.get(blob_url)
    return camhd.decode_frame_data(blob.content, 'rgb24')

In [None]:
test = azure_get_frame(blob_urls[0])

In [None]:
type(test)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
plt.imshow(test)

#### Set up Azure Dask array

In [None]:
delayed_frames = []
for blob_url in blob_urls:
    delayed_frames.append(da.from_delayed(
                            delayed(azure_get_frame)(blob_url),
                            shape=(1080, 1920, 3), dtype=np.uint8)[None,:,:,:])
delayed_frames[0]

In [None]:
ds_azure = xr.DataArray(da.concatenate(delayed_frames, axis=0), dims=['time', 'y', 'x', 'channel'],
                  coords={'time': timestamps}
                 ).to_dataset(name='video')
ds_azure

#### Compute the time-average of all images using Azure blob and plot

In [None]:
%%time
mean_image = ds_azure.video.mean(dim='time').load()

In [None]:
mean_image.astype('i8').plot.imshow();