# Create .PNG images of all timesteps from `idx_calls` loading from netCDF files


## Import necessary libraries

In [1]:
# to parallelize frame creation for timesteps
import multiprocessing

# for numerical work
import numpy as np

# for accessing file system
import os

# for loading netcdf files, for metadata
import xarray as xr

# Used for processing netCDF time data
import time
import datetime

# Used for indexing via metadata
import pandas as pd

# for plotting
import matplotlib
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

# for exporting the dictionary of issue files at the end of notebook
import pickle

# Accessory, used to generate progress bar for running for loops
# from tqdm.notebook import tqdm
# import ipywidgets
# import jupyterlab_widgets
from tqdm import tqdm

## Get path to original firesmoke data

In [2]:
# ******* THIS IS WHEN RUNNING FROM ATLANTIS.SCI **************
# directory to all netCDF firesmoke data
netcdf_dir = firesmoke_dir = "/usr/sci/cedmav/data/firesmoke"

In [3]:
# get metadata of datasets, had to be obtained manually
ids = ["BSC18CA12-01", "BSC00CA12-01", "BSC06CA12-01", "BSC12CA12-01"]
start_dates = ["20210304", "20210304", "20210304", "20210303"]
end_dates = ["20240627", "20240627", "20240627", "20240627"]

id_dates = {ids[i]: {"start_date": start_dates[i], "end_date": end_dates[i]} for i in range(len(ids))}

### In this section, we load metadata from 381x1041 and 381x1081 files using `xr.open_dataset`.

In [4]:
# path to small and big files
file_s = f'{netcdf_dir}/{ids[1]}/dispersion_20210304.nc'
file_b = f'{netcdf_dir}/{ids[1]}/dispersion_20240101.nc'

# open check out metadata of each file
ds_s = xr.open_dataset(file_s)
ds_b = xr.open_dataset(file_b)

In [5]:
ds_s

In [6]:
ds_b

## Calculate derived metadata using original metadata above to create coordinates
### We'll use this for creating our visualizations

#### Calculate latitude and longitude grid for each set of files' metadata

In [7]:
# Get metadata to compute lon and lat
longitude_s = np.linspace(ds_s.XORIG, ds_s.XORIG + ds_s.XCELL * (ds_s.NCOLS - 1), ds_s.NCOLS)
latitude_s = np.linspace(ds_s.YORIG, ds_s.YORIG + ds_s.YCELL * (ds_s.NROWS - 1), ds_s.NROWS)

longitude_b = np.linspace(ds_b.XORIG, ds_b.XORIG + ds_b.XCELL * (ds_b.NCOLS - 1), ds_b.NCOLS)
latitude_b = np.linspace(ds_b.YORIG, ds_b.YORIG + ds_b.YCELL * (ds_b.NROWS - 1), ds_b.NROWS)

#### The timestamps used in the files may not be intuitive. The following utility function returns the desired pandas timestamp based on your date and time of interest. 

##### When you index the data at a desired time, use this function to get the timestamp you need to index.

In [8]:
def parse_tflag(tflag):
    """
    Return the tflag as a datetime object
    :param list tflag: a list of two int32, the 1st representing date and 2nd representing time
    """
    # obtain year and day of year from tflag[0] (date)
    date = int(tflag[0])
    year = date // 1000 # first 4 digits of tflag[0]
    day_of_year = date % 1000 # last 3 digits of tflag[0]

    # create datetime object representing date
    final_date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1)

    # obtain hour, mins, and secs from tflag[1] (time)
    time = int(tflag[1])
    hours = time // 10000 # first 2 digits of tflag[1]
    minutes = (time % 10000) // 100 # 3rd and 4th digits of tflag[1] 
    seconds = time % 100  # last 2 digits of tflag[1]

    # create final datetime object
    full_datetime = datetime.datetime(year, final_date.month, final_date.day, hours, minutes, seconds)
    return full_datetime

In [9]:
def get_timestamp(year, month, day, hour):
    """
    return a pandas timestamp using the given date-time arguments
    :param int year: year
    :param int month: month
    :param int day: day
    :param int hour: hour
    """
    # Convert year, month, day, and hour to a datetime object
    full_datetime = datetime.datetime(year, month, day, hour)
    
    # Extract components from the datetime object
    year = full_datetime.year
    day_of_year = full_datetime.timetuple().tm_yday
    hours = full_datetime.hour
    minutes = full_datetime.minute
    seconds = full_datetime.second

    # Compute tflag[0] and tflag[1]
    tflag0 = year * 1000 + day_of_year
    tflag1 = hours * 10000 + minutes * 100 + seconds

    # Return the Pandas Timestamp object
    return pd.Timestamp(full_datetime)

## Import sequence of data slices to get at what time step

In [10]:
# Load idx_calls from file
with open('idx_calls_v4.pkl', 'rb') as f:
    idx_calls = pickle.load(f)

print(f"there's {len(idx_calls)} frames to make")

for c in np.arange(len(idx_calls)):
    idx_calls[c].append(c)

there's 27357 frames to make


## Create the frames

In [11]:
# directory to save frames
folder = "/usr/sci/scratch_nvme/arleth/dump/netcdf_frames/parallel/"

# set parameters for creating visualization of each timestep with matplotlib
my_norm = "log"
my_extent_s = [np.min(longitude_s), np.max(longitude_s), np.min(latitude_s), np.max(latitude_s)]
my_extent_b = [np.min(longitude_b), np.max(longitude_b), np.min(latitude_b), np.max(latitude_b)]
my_aspect = 'auto'
my_origin = 'lower'
my_cmap = 'hot'

# to keep track of files that fail to visualized into .PNG
issue_files = {}

In [12]:
def create_frame_from_call(call):
    # get instructions from call
    # [curr_id, file_str, parse_tflag(ds['TFLAG'].values[tstep_idx][0]), tstep_idx]
    curr_id = call[0]
    curr_file = call[1]
    curr_date = call[2]
    tstep_index = call[3]
    frame_num = call[4]
    
    # create visualization using matplotlib and cartopy geography lines, 
    # open the current file with xarray
    ds = xr.open_dataset(f'{netcdf_dir}/{curr_id}/{curr_file}')

    # Get the PM25 values, squeeze out empty axis
    ds_vals = np.squeeze(ds['PM25'].values)

    # get pm25 values at tstep_index and visualize them
    data_at_time = ds_vals[tstep_index]

    # get the timestamp for titling our plot, use hour 'h'
    t = pd.Timestamp(parse_tflag(ds['TFLAG'].values[tstep_index][0]))
    
    # catch exceptions accordingly
    try:
        my_fig, my_plt = plt.subplots(figsize=(15, 6), subplot_kw=dict(projection=ccrs.PlateCarree()))
        # extent is either with the 381x1041 lons/lats or 381x1081 lons/lats
        curr_extent = my_extent_s if ds['PM25'].shape[3] == 1041 else my_extent_b
        plot = my_plt.imshow(data_at_time, norm=my_norm, extent=curr_extent, aspect=my_aspect, origin=my_origin, cmap=my_cmap)
        my_fig.colorbar(plot,location='right', label='ug/m^3')
        my_plt.coastlines()
        my_plt.gridlines(draw_labels=True)
        # add a title with the time information
        my_fig.suptitle(f'Ground Level Concentration of PM2.5 Microns and Smaller\n{t}')
        
        # add an additional caption for context
        my_plt.text(0.5, -0.1, 'Original NetCDF Data', ha='center', va='center', transform=my_plt.transAxes)
        
        # save the visualization as a frame
        plt.savefig(folder + "frames%05d.png" % frame_num, dpi=280)
        plt.close(my_fig);  # Close the figure after saving
        # plt.show()
        matplotlib.pyplot.close()
    except: # return key and value to add to our issues dictionary
        print(f"issue! {t}")
        return t, data_at_time

In [13]:
 # create frames, capturing issues 
with multiprocessing.Pool() as pool:
    # Start a timer to measure how long the conversion takes
    start_time = time.time()
    print('starting')
    issues = pool.map(create_frame_from_call, idx_calls[0:30])
    print('done!')
    # End the timer and print the elapsed time
    end_time = time.time()
    print(f'Total elapsed time: {end_time - start_time}')

starting
done!
Total elapsed time: 5.630750894546509


In [14]:
for i in issues:
    if i != None:
        issue_files[timestamps[i]] = issues[i]

In [15]:
issue_files

{}

In [None]:
# save 'issue_files' to review
with open('new_netcdf_issues.pkl', 'wb') as f:
    pickle.dump(issue_files, f)

In [None]:
with open('new_netcdf_issues.pkl', 'rb') as f:
    new_netcdf_issues = pickle.load(f)
print(f'len of new_netcdf_issues.pkl = {len(new_netcdf_issues)}')