# Create .PNG images of all timesteps in IDX firesmoke dataset

## Import necessary libraries

In [None]:
import numpy as np # <1>
import os # <2>
import requests # <3>
import xarray as xr # <4>
from openvisuspy.xarray_backend import OpenVisusBackendEntrypoint # <5>
import time # <6>
import datetime # <6>
import pandas as pd # <7>
import matplotlib # <8>
import matplotlib.pyplot as plt # <8>
import cartopy.crs as ccrs # <8>
import pickle # <9>
os.environ["VISUS_CACHE"]="./visus_cache_can_be_erased" # <10>
from tqdm import tqdm # <11>
from OpenVisus import * # <12>

1. For numerical work
2. For accessing the file system
3. For downloading the latest firesmoke NetCDF
4. For loading NetCDF files and metadata
5. For connecting the OpenVisus framework to xarray (from [openvisuspy](https://github.com/sci-visus/openvisuspy))
6. Used for processing NetCDF time data
7. Used for indexing via metadata
8. For plotting
9. For exporting the dictionary of issue files at the end of the notebook
10. Stores the OpenVisus cache in the local directory
11. Accessory, used to generate a progress bar for running for loops
12. For importing OpenVisus functions

### In this section, we load our data using `xr.open_dataset`.

In [None]:
url = 'https://github.com/sci-visus/NSDF-WIRED/raw/main/data/firesmoke_metadata.nc' # <1>

response = requests.get(url) # <2>
local_netcdf = 'firesmoke_metadata.nc' # <3>
with open(local_netcdf, 'wb') as f: # <4>
    f.write(response.content)

ds = xr.open_dataset(local_netcdf, engine=OpenVisusBackendEntrypoint) # <5>

1. Path to the tiny NetCDF file
2. Download the file using `requests`
3. Local filename for the NetCDF file
4. Write the downloaded content to the local file system
5. Open the tiny NetCDF file with xarray and the OpenVisus backend

## Calculate derived metadata using original metadata above to create coordinates
### This is required to allow for indexing of data via metadata

#### Calculate latitude and longitude grid

In [None]:
xorig = ds.XORIG
yorig = ds.YORIG
xcell = ds.XCELL
ycell = ds.YCELL
ncols = ds.NCOLS
nrows = ds.NROWS

longitude = np.linspace(xorig, xorig + xcell * (ncols - 1), ncols)
latitude = np.linspace(yorig, yorig + ycell * (nrows - 1), nrows)

#### Using calculated latitude and longitude, create coordinates allowing for indexing data using lat/lon

In [None]:
ds.coords['lat'] = ('ROW', latitude) # <1>
ds.coords['lon'] = ('COL', longitude) # <2>

ds = ds.swap_dims({'COL': 'lon', 'ROW': 'lat'}) # <3>

1. Create coordinates for latitude based on the `ROW` dimension
2. Create coordinates for longitude based on the `COL` dimension
3. Replace the `COL` and `ROW` dimensions with the newly calculated longitude and latitude arrays

## Create the frames

In [None]:
def parse_tflag(tflag):
    """
    Return the tflag as a datetime object
    :param list tflag: a list of two int32, the 1st representing date and 2nd representing time
    """
    date = int(tflag[0]) # <1>
    year = date // 1000 # first 4 digits of tflag[0] # <1>
    day_of_year = date % 1000 # last 3 digits of tflag[0] # <1>

    final_date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1) # <2>

    time = int(tflag[1]) # <3>
    hours = time // 10000 # first 2 digits of tflag[1] # <3>
    minutes = (time % 10000) // 100 # 3rd and 4th digits of tflag[1] # <3>
    seconds = time % 100  # last 2 digits of tflag[1] # <3>

    full_datetime = datetime.datetime(year, final_date.month, final_date.day, hours, minutes, seconds) # <4>
    return full_datetime

1. Obtain year and day of year from tflag[0] (date)
2. Create datetime object representing date
3. Obtain hour, mins, and secs from tflag[1] (time)
4. Create final datetime object

In [None]:
# set parameters for creating visualization of each timestep with matplotlib
my_norm = "log" # <1>
my_extent = [np.min(longitude), np.max(longitude), np.min(latitude), np.max(latitude)] # <1>
my_aspect = 'auto' # <1>
my_origin = 'lower' # <1>
my_cmap = 'hot' # <1>

data_resolution = 0 # <2>
save_dir = "/usr/sci/scratch_nvme/arleth/dump/idx_frames/parallel/" # <3>
issue_files = {} # <4>

def create_frame_catch_issues(tstep):
    '''
    For given integer timestep, get the PM2.5 prediction and visualize it, or report that it is an 'issue' in our
    issue_files dictionary.
    :param tstep: The integer timestep we want to visualize.
    '''
    data_array_at_time = ds['PM25'].loc[tstep, :, :, data_resolution] # <5>
    
    try: # <6>
        t = pd.Timestamp(parse_tflag(ds.TFLAG[tstep][0]))
        my_fig, my_plt = plt.subplots(figsize=(15, 6), subplot_kw=dict(projection=ccrs.PlateCarree()))
        plot = my_plt.imshow(data_array_at_time, norm=my_norm, extent=my_extent, aspect=my_aspect, origin=my_origin, cmap=my_cmap)
        my_fig.colorbar(plot,location='right', label='ug/m^3')
        my_plt.coastlines()
        my_plt.gridlines(draw_labels=True)
        my_fig.suptitle(f'Ground level concentration of PM2.5 microns and smaller {t}\n')
        my_plt.text(0.5, -0.1, 'IDX Data', ha='center', va='center', transform=my_plt.transAxes)
        
        plt.savefig(save_dir + "frames%05d.png" % tstep, dpi=280) # <7>
        plt.close(my_fig)
        matplotlib.pyplot.close()
    except: # <8>
        t = pd.Timestamp(parse_tflag(ds.TFLAG[tstep][0]))# <8>
        print(f"issue! {t}")# <8>
        return t, data_array_at_time# <8>

1. Set parameters for creating visualization of each timestep with matplotlib.
2. Set the resolution level of the `PM25` data to max
3. Directory of environment to save frames
4. Dictionary to keep track of files with 'issues'.
5. Get PM2.5 values for given timestep, data resolution, and for all available latitudes and longitudes.
6. Try creating the visualization or catch exceptions accordingly.
7. Save image to file.
8. Print exception if one is found and save issue in issue dictionary using timestamp `t` as key.

In [None]:
with multiprocessing.Pool() as pool: # <1>
    start_time = time.time() # <2>
    issues = pool.map(create_frame_catch_issues, np.arange(0, 30))
    # End the timer and print the elapsed time
    end_time = time.time()
    print(f'Total elapsed time: {end_time - start_time}')

1. Create a pool of workers to submit `create_frame_catch_issues`.
2. Start a timer to measure how long it takes.

In [None]:
with open('new_idx_issues.pkl', 'wb') as f: # <1>
    pickle.dump(issue_files, f)# <1>

1. Save 'issue_files' to review