# Xarray and CamHD Data Example

#### Imports

In [None]:
%matplotlib inline
import pycamhd.pycamhd as camhd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr

#### Load a set of frames using pycamhd

In [None]:
dbcamhd = pd.read_json('dbcamhd.json', orient="records", lines=True)
fileindex = 2064
filename = dbcamhd['filename'][fileindex]
frame_count = dbcamhd['frame_count'][fileindex]
n_images = 1000
frame_numbers = np.linspace(750,frame_count-6000, n_images, dtype=np.int64())

In [None]:
#%%time
#frames = [] # fastest to append the frames into a list of ndarrays
#for frame_number in frame_numbers:
#    frames.append(camhd.get_frame(filename, frame_number, 'rgb24'))

#### Load frames using pycamhd and dask
Here we employ a Dask cluster and Delayed functions to speed up the fetching of images from the raw data server. Scale the cluster up to ~20 (Standard_D2_v3) nodes to fit 32 workers into the cluster.

In [None]:
from dask_kubernetes import KubeCluster
cluster = KubeCluster(n_workers=32)
cluster

In [None]:
from dask import delayed, compute
from dask.distributed import Client
client = Client(cluster)
client

#### Create Xarray DataArray out of Dask delayed functions

In [None]:
import dask.array as dsa

In [None]:
delayed_frames = []
for frame_number in frame_numbers:
    delayed_frames.append(dsa.from_delayed(
                            delayed(camhd.get_frame)(filename, frame_number, 'rgb24'),
                            shape=(1080, 1920, 3), dtype=np.uint8)[None,:,:,:])

In [None]:
delayed_frames[0]

In [None]:
all_data = dsa.concatenate(delayed_frames, axis=0)
all_data

In [None]:
ds = xr.DataArray(all_data, dims=['time', 'y', 'x', 'channel'],
                  coords={'time': pd.date_range(start='2016-01-01', periods=all_data.shape[0])}
                 ).to_dataset(name='video')
ds

In [None]:
mean_image = ds.video.mean(dim='time').load()

In [None]:
mean_image.astype('i8').plot.imshow()

In [None]:
ds.to_netcdf('test.nc')

In [None]:
%%time
delayed_frames = []
for frame_number in frame_numbers:
    delayed_frames.append(delayed(camhd.get_frame)(filename, frame_number, 'rgb24'))
frames = compute(*delayed_frames)

In [None]:
delayed_frames

#### Show results of benchmark testing
We ran the above cells with n_images from 4 to 512, and with n_workers from 0 to 128. Here are the results from this investigation.

In [None]:
bench_s = pd.DataFrame(
    {'n_images': [4, 8, 16, 32, 64, 128, 256, 512],
     0: [8.8, 16.4, 34.2, 68.0, 143.0, 300.0, 615.0, 1189.0],
     2: [2.6, 6.3, 8.9, 18.8, 37.7, 76.0, 156.0, np.NaN],
     4: [2.4, 2.5, 4.7, 9.4, 20.0, 38.9, 76.0, np.NaN],
     8: [2.4, 2.5, 2.7, 5.1, 11.1, 20.5, 40.1, np.NaN],
     16: [2.4, 2.4, 2.6, 3.3, 5.9, 12.0, 22.1, np.NaN],
     32: [2.3, 2.4, 2.6, 3.5, 4.5, 9.1, 13.7, np.NaN],
     64: [2.5, 2.5, 2.8, 3.4, 4.5, 7.0, 24.9, np.NaN],
     96: [2.6, 2.6, 2.9, 4.0, 5.9, 7.7, 22.5, 25.0],
     128: [2.4, 2.4, 2.8, 3.7, 4.5, 9.5, 13.5, 26.8],
    })
bench_s.set_index('n_images', inplace=True)
bench_s

In [None]:
bench_fps = 1/bench_s.div(bench_s.index.to_series(), axis=0)
bench_fps

In [None]:
ax = bench_fps.plot(figsize=(12, 8), marker='.', markersize=10)
ax.set_ylabel('Frames Per Second');
ax.set_xlabel('Number of Frames');

#### Convert list to Xarray DataArray

In [None]:
channel = ['r', 'g', 'b']

da = xr.DataArray(np.array(frames), \
                   coords=[frame_numbers, range(1080), range(1920), channel], \
                   dims=['frame','row','column','channel'])
da

In [None]:
da['frame'==12600].plot.imshow()

In [None]:
np.savetxt('frame_rgb.txt', frame, fmt='%u')

In [None]:
frames = []
for i in range(4000,4010):
    print(i)
    frames.append(camhd.get_frame(filename, frame_number, 'rgb24'))

In [None]:
frames.shape

In [None]:
test = np.array(frames)

In [None]:
type(test)

In [None]:
test.shape

In [None]:
ds = 

In [None]:
list(range(10, 20, 2))

In [None]:
frames[0].shape

In [None]:
len(frames)

In [None]:
data = np.array(frames)
data.shape

In [None]:
ds = xr.DataArray(data, dims=['time', 'y', 'x', 'channel'],
                  coords={'time': pd.date_range(start='2016-01-01', periods=data.shape[0])}
                 ).to_dataset(name='video')
ds

In [None]:
da.plot.imshow()