# Create small netcdf
We import the arrays generated in `firesmoke_make_metadata.ipynb` as well as additional metadata from original netCDF file.

## Import necessary libraries

In [1]:
# to open original netcdfs and new netcdf with idx url
import xarray as xr
import numpy as np
# for connecting OpenVisus framework to xarray
# from https://github.com/sci-visus/openvisuspy, 
from openvisuspy.xarray_backend import OpenVisusBackendEntrypoint

## Load the original metadata from one dispersion.nc file

Get path to a netcdf file containing metadata

In [2]:
# ******* THIS IS WHEN RUNNING FROM CANADA1 **************
# location of netCDF files
firesmoke_dir = "/opt/wired-data/firesmoke/final_union_set"
firesmoke_file = f"{firesmoke_dir}/dispersion_2025316_154209.nc"

In [3]:
# load the original netcdf file
ds = xr.open_dataset(firesmoke_file)

# take a look
ds

Drop the TFLAGs and create a new TFLAGs variable with all 39k timesteps. 
Drop PM25 variable, this is the variable of interest and already in IDX file.
Drop the attributes that are unique to this file (except `grid_attrs`, we want the ones in this file as they are the grid_attrs used across all timesteps in our IDX file)

In [4]:
# drop the TFLAG variable and add our own, also drop PM25, this is the array stored by idx
# ref: https://docs.xarray.dev/en/stable/generated/xarray.DataArray.drop_vars.html
ds_new = ds.drop_vars("TFLAG")
ds_new = ds_new.drop_vars("PM25")

# delete the attributes we are now adding as variables
attrs_desired = ds.attrs.keys()
# Only keep attrs_desired that are NOT grid attributes
grid_attrs = {'XORIG', 'YORIG', 'XCELL', 'YCELL', 'NCOLS', 'NROWS', 'XCENT', 'YCENT'}
attrs_desired_non_grid = [attr for attr in attrs_desired if attr not in grid_attrs]

for attr in attrs_desired_non_grid:
    del ds_new.attrs[attr]

# ds_new is now empty except for attributes
ds_new

## Set attributes for new NetCDF using metadata from `firesmoke_make_metadata.ipynb`

Import metadata from `.npy` files generated by `firesmoke_make_metadata_idxv5.ipynb`

In [5]:
tflags = np.load("firesmoke_v5-tflags.npy")
resamp = np.load("firesmoke_v5-resamp.npy")

attr_data = {attr: [] for attr in attrs_desired_non_grid}
for attr in attrs_desired_non_grid:
    try:
        attr_data[attr] = np.load(f"firesmoke_v5-{attr}.npy")
    except Exception as e:
        print(f".npy file for attribute '{attr}' not found: {e}")

Assign TFLAGs of `ds_new`.

In [6]:
# reshape timeflags so they're exactly like UBC's original tflags
tflags = np.expand_dims(tflags, axis=1)

# make timeflags int32, as in original netcdf
tflags = tflags.astype(np.int32)

# create dimensions for these arrays of metadata, stay as close as possible to UBC style dimension naming convention
# ref: https://docs.xarray.dev/en/latest/generated/xarray.Dataset.assign.html
ds_new = ds_new.assign({'TFLAG':(['time', 'VAR', 'DATE-TIME'], tflags)})

Assign variable indicating if timesteps were resampled from 1061x381 to 1081x381 or not.

In [7]:
ds_new = ds_new.assign({'resampled': (['time'], resamp)})

Check which attrs vary across time and which are the same across all timesteps.

In [8]:
# Check which attrs vary across time and which are constant across all timesteps
attrs_timevarying = []
attrs_constant = []

for attr in attrs_desired_non_grid:
    if np.all(attr_data[attr] == attr_data[attr][0]):
        attrs_constant.append(attr)
    else:
        attrs_timevarying.append(attr)

print("Constant across time:", attrs_constant)
print("Vary across time:", attrs_timevarying)

Constant across time: ['IOAPI_VERSION', 'EXEC_ID', 'FTYPE', 'TSTEP', 'NTHIK', 'NLAYS', 'NVARS', 'GDTYP', 'P_ALP', 'P_BET', 'P_GAM', 'VGTYP', 'VGTOP', 'VGLVLS', 'GDNAM', 'UPNAM', 'VAR-LIST', 'FILEDESC', 'HISTORY']
Vary across time: ['CDATE', 'CTIME', 'WDATE', 'WTIME', 'SDATE', 'STIME']


For attributes constant across time add them as a global attribute in `ds_new`

In [9]:
for attr in attrs_constant:
    ds_new.attrs[attr] = attr_data[attr][0]

For attributes varying across time, add attribute data as a new variable using 'time' as the dimension.

In [10]:
for attr in attrs_timevarying:
    ds_new[attr] = (['time'], attr_data[attr])

Now add Dataarray for variable 'PM25' to define dimensions to use for this variable.

In [11]:
# Create a PM25 dataarray representing dims of one time slice
pm25_var = xr.DataArray(np.zeros((ds.NROWS, ds.NCOLS)), dims=['ROW', 'COL'], attrs=ds['PM25'].attrs)

# add PM25 to dataset, this is strictly for storing the dimensionality of PM25
# as this is dimensionality info is not available to xarray from idx file
ds_new['PM25'] = pm25_var

### Add idx_url as an attribute, then we're ready to save our tiny netcdf.

In [12]:
# url to idx file
idx_url = "http://66.70.176.206:10800/mod_visus?dataset=firesmoke&cached=1"

# Adding a new idx_url attribute
ds_new = ds_new.assign_attrs({'idx_url': idx_url})

In [13]:
# directory to new tiny netcfd file
new_nc = "firesmoke.nc"

# save xarray dataset as netcdf, this is the metadata we put into tiny netcdf
ds_new.to_netcdf(new_nc)

In [14]:
# check out the tiny netcdf loaded by vanilla xarray
xr.open_dataset(new_nc)

## Test tiny netcdf

## Load tiny netcdf file with backend_v3 and pass timestamps above as timesteps

In [15]:
# load data
final_ds = xr.open_dataset(new_nc, engine=OpenVisusBackendEntrypoint)
final_ds

ov.LoadDataset(http://66.70.176.206:10800/mod_visus?dataset=firesmoke&cached=1)
PM25
Adding field  PM25 shape  [39546, 381, 1081, 21] dtype  float32 labels  ['time', 'ROW', 'COL', 'resolution'] Max Resolution  20
