# Create .PNG images of all timesteps in IDX firesmoke dataset

## Import necessary libraries

In [1]:
# for numerical work
import numpy as np

# for accessing file system
import os

# for downloading latest firesmoke netCDF
import requests

# for loading netcdf files, for metadata
import xarray as xr
# for connecting OpenVisus framework to xarray
# from https://github.com/sci-visus/openvisuspy, 
from openvisuspy.xarray_backend import OpenVisusBackendEntrypoint

# Used for processing netCDF time data
import time
import datetime

# Used for indexing via metadata
import pandas as pd

# for plotting
import matplotlib
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

# for exporting the dictionary of issue files at the end of notebook
import pickle

# Stores the OpenVisus cache in the local direcrtory 
import os
os.environ["VISUS_CACHE"]="./visus_cache_can_be_erased"

# Accessory, used to generate progress bar for running for loops
# from tqdm.notebook import tqdm
# import ipywidgets
# import jupyterlab_widgets
from tqdm import tqdm

from OpenVisus import *

### In this section, we load our data using `xr.open_dataset`.

In [2]:
# path to tiny NetCDF
url = 'https://github.com/sci-visus/NSDF-WIRED/raw/main/data/firesmoke_metadata.nc'

# Download the file using requests
response = requests.get(url)
local_netcdf = 'firesmoke_metadata.nc'
with open(local_netcdf, 'wb') as f:
    f.write(response.content)
    
# open tiny netcdf with xarray and OpenVisus backend
ds = xr.open_dataset(local_netcdf, engine=OpenVisusBackendEntrypoint)

ov.LoadDataset(http://atlantis.sci.utah.edu/mod_visus?dataset=UBC_fire_smoke_BSC&cached=1)
PM25
Adding field  PM25 shape  [27357, 381, 1081, 21] dtype  float32 labels  ['time', 'ROW', 'COL', 'resolution'] Max Resolution  20


## Calculate derived metadata using original metadata above to create coordinates
### This is required to allow for indexing of data via metadata

#### Calculate latitude and longitude grid

In [3]:
# Get metadata to compute lon and lat
xorig = ds.XORIG
yorig = ds.YORIG
xcell = ds.XCELL
ycell = ds.YCELL
ncols = ds.NCOLS
nrows = ds.NROWS

longitude = np.linspace(xorig, xorig + xcell * (ncols - 1), ncols)
latitude = np.linspace(yorig, yorig + ycell * (nrows - 1), nrows)

#### Using calculated latitude and longitude, create coordinates allowing for indexing data using lat/lon

In [4]:
# Create coordinates for lat and lon (credit: Aashish Panta)
ds.coords['lat'] = ('ROW', latitude)
ds.coords['lon'] = ('COL', longitude)

# Replace col and row dimensions with newly calculated lon and lat arrays (credit: Aashish Panta)
ds = ds.swap_dims({'COL': 'lon', 'ROW': 'lat'})

## Create the frames

In [5]:
def parse_tflag(tflag):
    """
    Return the tflag as a datetime object
    :param list tflag: a list of two int32, the 1st representing date and 2nd representing time
    """
    # obtain year and day of year from tflag[0] (date)
    date = int(tflag[0])
    year = date // 1000 # first 4 digits of tflag[0]
    day_of_year = date % 1000 # last 3 digits of tflag[0]

    # create datetime object representing date
    final_date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1)

    # obtain hour, mins, and secs from tflag[1] (time)
    time = int(tflag[1])
    hours = time // 10000 # first 2 digits of tflag[1]
    minutes = (time % 10000) // 100 # 3rd and 4th digits of tflag[1] 
    seconds = time % 100  # last 2 digits of tflag[1]

    # create final datetime object
    full_datetime = datetime.datetime(year, final_date.month, final_date.day, hours, minutes, seconds)
    return full_datetime

In [7]:
# set parameters for indexing data
data_resolution = 0
save_dir = "/usr/sci/scratch_nvme/arleth/dump/idx_frames/serial/"

# set parameters for creating visualization of each timestep with matplotlib
my_norm = "log"
my_extent = [np.min(longitude), np.max(longitude), np.min(latitude), np.max(latitude)]
my_aspect = 'auto'
my_origin = 'lower'
my_cmap = 'hot'

# to keep track of files that fail to visualized into .PNG
issue_files = {}

# Start a timer to measure how long the conversion takes
start_time = time.time()
# for all timesteps create visualization of firesmoke at time
for tstep in tqdm(np.arange(0, 30)):
    # get PM25 values and provide 4 values, the colons mean select all lat and lon indices
    data_array_at_time = ds['PM25'].loc[tstep, :, :, data_resolution]
    
    # create visualization using matplotlib and cartopy geography lines, 
    # catch exceptions accordingly
    try:
        t = pd.Timestamp(parse_tflag(ds.TFLAG[tstep][0]))
        my_fig, my_plt = plt.subplots(figsize=(15, 6), subplot_kw=dict(projection=ccrs.PlateCarree()))
        plot = my_plt.imshow(data_array_at_time, norm=my_norm, extent=my_extent, aspect=my_aspect, origin=my_origin, cmap=my_cmap)
        my_fig.colorbar(plot,location='right', label='ug/m^3')
        my_plt.coastlines()
        my_plt.gridlines(draw_labels=True)
        my_fig.suptitle(f'Ground level concentration of PM2.5 microns and smaller {t}\n')
        # add caption showing this is from IDX dataset
        my_plt.text(0.5, -0.1, 'IDX Data', ha='center', va='center', transform=my_plt.transAxes)
        
        # # save visualization as a .PNG to our folder
        plt.savefig(save_dir + "frames%05d.png" % tstep, dpi=280)
        plt.close(my_fig);  # close the figure after saving
        # plt.show()
        matplotlib.pyplot.close()
    except:
        t = pd.Timestamp(parse_tflag(ds.TFLAG[tstep][0]))
        issue_files[t] = data_array_at_time
        continue
# End the timer and print the elapsed time
end_time = time.time()
print(f'Total elapsed time: {end_time - start_time}')

  0%|          | 0/30 [00:00<?, ?it/s]

Using Max Resolution:  20
Time: 0, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


  3%|▎         | 1/30 [00:02<01:24,  2.91s/it]

Using Max Resolution:  20
Time: 1, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


  7%|▋         | 2/30 [00:05<01:20,  2.87s/it]

Using Max Resolution:  20
Time: 2, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 10%|█         | 3/30 [00:08<01:16,  2.83s/it]

Using Max Resolution:  20
Time: 3, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 13%|█▎        | 4/30 [00:11<01:12,  2.80s/it]

Using Max Resolution:  20
Time: 4, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 17%|█▋        | 5/30 [00:14<01:14,  2.98s/it]

Using Max Resolution:  20
Time: 5, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 20%|██        | 6/30 [00:17<01:09,  2.89s/it]

Using Max Resolution:  20
Time: 6, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 23%|██▎       | 7/30 [00:20<01:05,  2.84s/it]

Using Max Resolution:  20
Time: 7, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 27%|██▋       | 8/30 [00:22<01:01,  2.81s/it]

Using Max Resolution:  20
Time: 8, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 30%|███       | 9/30 [00:25<00:58,  2.80s/it]

Using Max Resolution:  20
Time: 9, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 33%|███▎      | 10/30 [00:28<00:57,  2.86s/it]

Using Max Resolution:  20
Time: 10, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 37%|███▋      | 11/30 [00:31<00:53,  2.82s/it]

Using Max Resolution:  20
Time: 11, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 40%|████      | 12/30 [00:34<00:50,  2.81s/it]

Using Max Resolution:  20
Time: 12, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 43%|████▎     | 13/30 [00:36<00:47,  2.80s/it]

Using Max Resolution:  20
Time: 13, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 47%|████▋     | 14/30 [00:39<00:44,  2.79s/it]

Using Max Resolution:  20
Time: 14, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 50%|█████     | 15/30 [00:42<00:42,  2.86s/it]

Using Max Resolution:  20
Time: 15, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 53%|█████▎    | 16/30 [00:45<00:39,  2.84s/it]

Using Max Resolution:  20
Time: 16, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 57%|█████▋    | 17/30 [00:48<00:36,  2.83s/it]

Using Max Resolution:  20
Time: 17, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 60%|██████    | 18/30 [00:51<00:33,  2.82s/it]

Using Max Resolution:  20
Time: 18, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 63%|██████▎   | 19/30 [00:53<00:31,  2.82s/it]

Using Max Resolution:  20
Time: 19, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 67%|██████▋   | 20/30 [00:56<00:28,  2.81s/it]

Using Max Resolution:  20
Time: 20, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 70%|███████   | 21/30 [00:59<00:25,  2.81s/it]

Using Max Resolution:  20
Time: 21, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 73%|███████▎  | 22/30 [01:02<00:22,  2.81s/it]

Using Max Resolution:  20
Time: 22, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 77%|███████▋  | 23/30 [01:05<00:19,  2.80s/it]

Using Max Resolution:  20
Time: 23, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 80%|████████  | 24/30 [01:07<00:16,  2.81s/it]

Using Max Resolution:  20
Time: 24, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 83%|████████▎ | 25/30 [01:10<00:14,  2.81s/it]

Using Max Resolution:  20
Time: 25, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 87%|████████▋ | 26/30 [01:13<00:11,  2.82s/it]

Using Max Resolution:  20
Time: 26, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 90%|█████████ | 27/30 [01:16<00:08,  2.92s/it]

Using Max Resolution:  20
Time: 27, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 93%|█████████▎| 28/30 [01:19<00:05,  2.88s/it]

Using Max Resolution:  20
Time: 28, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


 97%|█████████▋| 29/30 [01:22<00:02,  2.86s/it]

Using Max Resolution:  20
Time: 29, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


100%|██████████| 30/30 [01:25<00:00,  2.84s/it]

Total elapsed time: 85.07665729522705





In [None]:
# save 'issue_files' to review
with open('new_idx_issues.pkl', 'wb') as f:
    pickle.dump(issue_files, f)

In [None]:
with open('new_idx_issues.pkl', 'rb') as f:
    new_idx_issues = pickle.load(f)
print(f'len of new_idx_issues.pkl = {len(new_idx_issues)}')