# Create .PNG images of all timesteps in IDX firesmoke dataset

## Import necessary libraries

In [None]:
import numpy as np # <1>
import os # <2>
import requests # <3>
import xarray as xr # <4>
from openvisuspy.xarray_backend import OpenVisusBackendEntrypoint # <5>
import time # <6>
import datetime # <6>
import pandas as pd # <7>
import matplotlib # <8>
import matplotlib.pyplot as plt # <8>
import cartopy.crs as ccrs # <8>
import pickle # <9>
os.environ["VISUS_CACHE"]="./visus_cache_can_be_erased" # <10>
from tqdm import tqdm # <11>
from OpenVisus import * # <12>

1. For numerical work
2. For accessing the file system
3. For downloading the latest firesmoke NetCDF
4. For loading NetCDF files and metadata
5. For connecting the OpenVisus framework to xarray (from [openvisuspy](https://github.com/sci-visus/openvisuspy))
6. Used for processing NetCDF time data
7. Used for indexing via metadata
8. For plotting
9. For exporting the dictionary of issue files at the end of the notebook
10. Stores the OpenVisus cache in the local directory
11. Accessory, used to generate a progress bar for running for loops
12. For importing OpenVisus functions

### In this section, we load our data using `xr.open_dataset`.

In [None]:
url = 'https://github.com/sci-visus/NSDF-WIRED/raw/main/data/firesmoke_metadata_recent.nc' # <1>

response = requests.get(url) # <2>
local_netcdf = 'firesmoke_metadata.nc' # <3>
with open(local_netcdf, 'wb') as f: # <4>
    f.write(response.content)

ds = xr.open_dataset(local_netcdf, engine=OpenVisusBackendEntrypoint) # <5>

1. Path to the tiny NetCDF file
2. Download the file using `requests`
3. Local filename for the NetCDF file
4. Write the downloaded content to the local file system
5. Open the tiny NetCDF file with xarray and the OpenVisus backend

## Calculate derived metadata using original metadata above to create coordinates
### This is required to allow for indexing of data via metadata

#### Calculate latitude and longitude grid

In [None]:
xorig = ds.XORIG
yorig = ds.YORIG
xcell = ds.XCELL
ycell = ds.YCELL
ncols = ds.NCOLS
nrows = ds.NROWS

longitude = np.linspace(xorig, xorig + xcell * (ncols - 1), ncols)
latitude = np.linspace(yorig, yorig + ycell * (nrows - 1), nrows)

#### Using calculated latitude and longitude, create coordinates allowing for indexing data using lat/lon

In [None]:
ds.coords['lat'] = ('ROW', latitude) # <1>
ds.coords['lon'] = ('COL', longitude) # <2>

ds = ds.swap_dims({'COL': 'lon', 'ROW': 'lat'}) # <3>

1. Create coordinates for latitude based on the `ROW` dimension
2. Create coordinates for longitude based on the `COL` dimension
3. Replace the `COL` and `ROW` dimensions with the newly calculated longitude and latitude arrays

## Get timestamps to label video frames
Need to use `idx_calls` generated during conversion

In [None]:
with open('idx_calls_v4.pkl', 'rb') as f: # <1>
    idx_calls = pickle.load(f) # <1>

1. Load idx_calls from file

##### Return an array of the tflags as pandas timestamps

In [None]:
timestamps = [] # <1>

for call in idx_calls: # <2>
    timestamps.append(pd.Timestamp(call[2])) # <3>

1. Initialize an empty list to store pandas timestamps
2. Loop through the `idx_calls` to process each call
3. Convert the `tflags` to pandas `Timestamp` and store in the `timestamps` list

## Create the video

In [None]:
data_resolution = 0 # <1>
folder = "/usr/sci/scratch_nvme/arleth/dump/idx_frames" # <2>

my_norm = "log" # <3>
my_extent = [np.min(longitude), np.max(longitude), np.min(latitude), np.max(latitude)] # <3>
my_aspect = 'auto' # <3>
my_origin = 'lower' # <3>
my_cmap = 'hot' # <3>

issue_files = {} # <4>

for i in tqdm(range(len(idx_calls))): # <5>
    data_array_at_time = ds['PM25'].loc[i, :, :, data_resolution] # <6>

    try: # <7>
        my_fig, my_plt = plt.subplots(figsize=(15, 6), subplot_kw=dict(projection=ccrs.PlateCarree()))
        plot = my_plt.imshow(data, norm=my_norm, extent=my_extent, aspect=my_aspect, origin=my_origin, cmap=my_cmap)
        my_fig.colorbar(plot, location='right', label='ug/m^3')
        my_plt.coastlines()
        my_plt.gridlines(draw_labels=True)
        my_fig.suptitle(f'Ground level concentration of PM2.5 microns and smaller {timestamps[i]}\n')
        my_plt.text(0.5, -0.1, 'IDX Data', ha='center', va='center', transform=my_plt.transAxes)
        
        plt.savefig(folder + "/frames%05d.png" % i, dpi=280) # <8>
        plt.close(my_fig)
        matplotlib.pyplot.close()
    except: # <9>
        issue_files[timestamps[i]] = data # <9>
        continue # <9>

1. Set the resolution level of the `PM25` data to max
2. Directory of environment to save frames
3. Set parameters for creating visualization of each timestep with matplotlib.
4. Dictionary to keep track of files with 'issues'.
5. For all timesteps create visualization of firesmoke at time.
6. Get PM2.5 values and provide 4 values, the colons mean select all lat and lon indices.
7. Try creating the visualization or catch exceptions accordingly.
8. Save images to file.
9. Print exception if one is found and save issue in issue dictionary using timestamp `t` as key.

In [None]:
with open('new_idx_issues.pkl', 'wb') as f: # <1>
    pickle.dump(issue_files, f) # <1>

1. Save 'issue_files' to review