# ProRes Frames to Zarr Example
Here we write original ProRes compressed frames into a zarr array and show how to work with this database using lazy operations.

#### Imports

In [None]:
%matplotlib inline
import pycamhd.pycamhd as camhd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr

#### Create a list of frames to analyze using the dbcamhd.json database

In [None]:
dbcamhd = pd.read_json('dbcamhd.json', orient="records", lines=True)
dbcamhd.tail()

In [None]:
fileindex = 2064
filename = dbcamhd.filename[fileindex]
frame_count = dbcamhd.frame_count[fileindex]
n_images = 1000
frame_numbers = np.linspace(750,frame_count-6000, n_images, dtype=np.int64())
filename

In [None]:
frame_numbers[0:10]

#### Create timestamps for frames

In [None]:
from datetime import datetime

In [None]:
timestamps = []
for i in range(len(frame_numbers)):
    timestamps.append(datetime.fromtimestamp(dbcamhd.timestamp[fileindex] + frame_numbers[i]/29.95))

In [None]:
timestamps[0:5]

#### Create Xarray Dataset out of Dask delayed functions
Here instead of calling get_frame, we call get_frame_data which returns the raw ProRes encoded frame. The encoded data has a compression ratio of about 10:1, so storage costs will be lower and transfer rates (frames/s) should be higher.

In [None]:
from dask import delayed
import dask.array as da

In [None]:
def get_frame_data_array(filename, frame_number, moov_atom):
    return np.array(camhd.get_frame_data, dtype='bytes')[None,None]

In [None]:
moov_atom = camhd.get_moov_atom(filename)
delayed_frame_data = []
for frame_number in frame_numbers:
    delayed_frame_data.append(da.from_delayed(
                              delayed(get_frame_data_array)(filename, frame_number, moov_atom),
                              shape=(1,1), dtype='bytes'))

In [None]:
delayed_frame_data[0]

In [None]:
ds = xr.DataArray(da.concatenate(delayed_frame_data, axis=0)).to_dataset(name='video')
ds

#### Start a Dask cluster

In [None]:
from dask_kubernetes import KubeCluster
cluster = KubeCluster(n_workers=32)
cluster

In [None]:
from dask import delayed, compute
from dask.distributed import Client
client = Client(cluster)
client

#### Use to_zarr() to save to Azure Blob Storage 

In [None]:
import zarr
import xarray as xr
from azure_credentials import account_name, account_key

In [None]:
absstore = zarr.storage.ABSStore('rte-pangeo-data', 'test_tjc.zarr', account_name, account_key)

In [None]:
absstore.rmdir() # delete the zarr group before attempting write

In [None]:
%%time
ds.to_zarr(absstore)

In [None]:
%%time
del ds
ds = xr.open_zarr(absstore)
ds

In [None]:
ds

In [None]:
test = ds.video[0].values()
type(test)

In [None]:
asdf = test.to_dict()

In [None]:
test = camhd.get_frame_data(filename, frame_numbers[10])

In [None]:
plt.imshow(camhd.decode_frame_data(test, 'rgb24'))