# Create .PNG images of all timesteps from `idx_calls` loading from netCDF files


## Import necessary libraries

In [None]:
import numpy as np # <1>
import os # <2>
import xarray as xr # <3>
import time # <4>
import datetime # <4>
import pandas as pd # <5>
import matplotlib # <6>
import matplotlib.pyplot as plt # <6>
import cartopy.crs as ccrs # <6>
import pickle # <7>
from tqdm import tqdm # <8>

1. For numerical work
2. For accessing file system
3. For loading NetCDF files, for metadata
4. Used for processing netCDF time data
5. Used for indexing via metadata
6. For plotting
7. For exporting the dictionary of issue files at the end of notebook and importing `idx_calls.pkl`
8. Accessory, used to generate progress bar for running for loops

## Get path to original firesmoke data

In [None]:
netcdf_dir = "/usr/sci/cedmav/data/firesmoke"

In [None]:
ids = ["BSC18CA12-01", "BSC00CA12-01", "BSC06CA12-01", "BSC12CA12-01"] # <1>
start_dates = ["20210304", "20210304", "20210304", "20210303"] # <1>
end_dates = ["20240627", "20240627", "20240627", "20240627"] # <1>

id_dates = {ids[i]: {"start_date": start_dates[i], "end_date": end_dates[i]} for i in range(len(ids))} # <2>

1. Date ranges for each smoke forecast dataset.
2. Create dictionary of all file names using information above

### In this section, we load metadata from 381x1041 and 381x1081 files using `xr.open_dataset`.

In [None]:
file_s = f'{netcdf_dir}/{ids[1]}/dispersion_20210304.nc' # <1>
file_b = f'{netcdf_dir}/{ids[1]}/dispersion_20240101.nc' # <1>

ds_s = xr.open_dataset(file_s) # <2>
ds_b = xr.open_dataset(file_b) # <2>

1. Path to small and big files
2. Open metadata of each file

## Calculate derived metadata using original metadata above to create coordinates
### We'll use this for creating our visualizations

#### Calculate latitude and longitude grid for each set of files' metadata

In [None]:
longitude_s = np.linspace(ds_s.XORIG, ds_s.XORIG + ds_s.XCELL * (ds_s.NCOLS - 1), ds_s.NCOLS)
latitude_s = np.linspace(ds_s.YORIG, ds_s.YORIG + ds_s.YCELL * (ds_s.NROWS - 1), ds_s.NROWS)

longitude_b = np.linspace(ds_b.XORIG, ds_b.XORIG + ds_b.XCELL * (ds_b.NCOLS - 1), ds_b.NCOLS)
latitude_b = np.linspace(ds_b.YORIG, ds_b.YORIG + ds_b.YCELL * (ds_b.NROWS - 1), ds_b.NROWS)

#### The timestamps used in the files may not be intuitive. The following utility function returns the desired pandas timestamp based on your date and time of interest. 

##### When you index the data at a desired time, use this function to get the timestamp you need to index.

In [None]:
def parse_tflag(tflag):
    """
    Return the tflag as a datetime object
    :param list tflag: a list of two int32, the 1st representing date and 2nd representing time
    """
    date = int(tflag[0]) # <1>
    year = date // 1000 # first 4 digits of tflag[0] # <1>
    day_of_year = date % 1000 # last 3 digits of tflag[0] # <1>

    final_date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1) # <2>

    time = int(tflag[1]) # <3>
    hours = time // 10000 # first 2 digits of tflag[1] # <3>
    minutes = (time % 10000) // 100 # 3rd and 4th digits of tflag[1] # <3>
    seconds = time % 100  # last 2 digits of tflag[1] # <3>

    full_datetime = datetime.datetime(year, final_date.month, final_date.day, hours, minutes, seconds) # <4>
    return full_datetime

1. Obtain year and day of year from tflag[0] (date)
2. Create datetime object representing date
3. Obtain hour, mins, and secs from tflag[1] (time)
4. Create final datetime object

In [None]:
def get_timestamp(year, month, day, hour):
    """
    return a pandas timestamp using the given date-time arguments
    :param int year: year
    :param int month: month
    :param int day: day
    :param int hour: hour
    """
    full_datetime = datetime.datetime(year, month, day, hour) # <1>
    
    year = full_datetime.year # <2>
    day_of_year = full_datetime.timetuple().tm_yday # <2>
    hours = full_datetime.hour # <2>
    minutes = full_datetime.minute # <2>
    seconds = full_datetime.second # <2>

    tflag0 = year * 1000 + day_of_year # <3>
    tflag1 = hours * 10000 + minutes * 100 + seconds # <3>

    return pd.Timestamp(full_datetime) # <4>

1. Convert year, month, day, and hour to a datetime object
2. Extract components from the datetime object
3. Compute tflag[0] and tflag[1]
4. Return the Pandas Timestamp object

## Import sequence of data slices to get at what time step

In [None]:
with open('idx_calls_v4.pkl', 'rb') as f: # <1>
    idx_calls = pickle.load(f) # <1>

1. Load idx_calls from file

## Create the video

In [None]:
folder = "/usr/sci/scratch_nvme/arleth/dump/netcdf_frames" # <1>

my_norm = "log" # <2>
my_extent_s = [np.min(longitude_s), np.max(longitude_s), np.min(latitude_s), np.max(latitude_s)]# <2>
my_extent_b = [np.min(longitude_b), np.max(longitude_b), np.min(latitude_b), np.max(latitude_b)]# <2>
my_aspect = 'auto'# <2>
my_origin = 'lower'# <2>
my_cmap = 'hot'# <2>

issue_files = {} # <3>

frame_num = 0 # <4>

for call in tqdm(idx_calls): # <5>
    curr_id = call[0] # <6>
    curr_file = call[1] # <6>
    curr_date = call[2] # <6>
    tstep_index = call[3] # <6>
    
    ds = xr.open_dataset(f'{netcdf_dir}/{curr_id}/{curr_file}') # <7>

    ds_vals = np.squeeze(ds['PM25'].values) # <8>

    data_at_time = ds_vals[tstep_index] # <9>

    t = pd.Timestamp(parse_tflag(ds['TFLAG'].values[tstep_index][0])) # <10>
    
    try: # <11>
        my_fig, my_plt = plt.subplots(figsize=(15, 6), subplot_kw=dict(projection=ccrs.PlateCarree()))
        curr_extent = my_extent_s if ds['PM25'].shape[3] == 1041 else my_extent_b # <12>
        plot = my_plt.imshow(data_at_time, norm=my_norm, extent=curr_extent, aspect=my_aspect, origin=my_origin, cmap=my_cmap)
        my_fig.colorbar(plot,location='right', label='ug/m^3')
        my_plt.coastlines()
        my_plt.gridlines(draw_labels=True)
        my_fig.suptitle(f'Ground Level Concentration of PM2.5 Microns and Smaller\n{t}') # <13>
        
        my_plt.text(0.5, -0.1, 'Original NetCDF Data', ha='center', va='center', transform=my_plt.transAxes) # <14>
        
        plt.savefig(folder + "/frames%05d.png" % frame_num, dpi=280) # <15>
        plt.close(my_fig);  # <16>
        matplotlib.pyplot.close()
    except: # <17>
        print(f"issue! {t}") # <17>
        issue_files[t] = data_at_time # <17>
        continue # <17>
    frame_num = frame_num + 1 # <18>

1. Directory of environment to save frames
2. Set parameters for creating visualization of each timestep with matplotlib.
3. Dictionary to keep track of files with 'issues'.
4. To track what frame we're on in the following loop.
5. For all timesteps create visualization of firesmoke at time.
6. Get instructions from call.
7. Open the current file with xarray.
8. Get the PM25 values, squeeze out empty axis.
9. Get PM2.5 values at tstep_index and visualize them.
10. Get the timestamp for titling our plot, use hour 'h'.
11. Catch exceptions accordingly.
12. Extent is either with the 381x1041 lons/lats or 381x1081 lons/lats.
13. Add a title with the time information.
14. Add an additional caption for context.
15. Save the visualization as a frame.
16. Close the figure after saving.
17. Print exception if one is found and save issue in issue dictionary using timestamp `t` as key.
18. Whether exception or not, next frame count to align with idx script.

In [None]:
with open('new_netcdf_issues.pkl', 'wb') as f: # <1>
    pickle.dump(issue_files, f) # <1>

1. Save 'issue_files' to review