# Create .PNG images of all timesteps in ECCC PM2.5 dataset

## Import libraries and data

In [1]:
# for numerical work
import numpy as np
import itertools

# for accessing file system
import os

# for loading netcdf files, for metadata
import xarray as xr
# for connecting OpenVisus framework to xarray
# from https://github.com/sci-visus/openvisuspy, 
from openvisuspy.xarray_backend import OpenVisusBackendEntrypoint

# Used for processing netCDF time data
import time
import datetime
import requests
# Used for indexing via metadata
import pandas as pd

# for plotting
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt

# for parallelization
import multiprocessing

#Stores the OpenVisus cache in the local direcrtory 
import os
os.environ["VISUS_CACHE"]="./visus_cache_can_be_erased"
os.environ['CURL_CA_BUNDLE'] = ''

### Import ECCC 2021 and 2022 Data

In [2]:
df_eccc = pd.read_csv('PM25_2021_2022.csv')

In [3]:
# this date has the max PM2.5 value
# all_dates[15744]
# df_eccc.iloc[150714]

In [18]:
maxes = []
for i in range(0, 24):
    maxes.append(np.max(df_eccc[f'{i}'].values))

### Create set of all hours to query for

In [5]:
eccc_dates = np.sort(df_eccc['Date//Date'].unique())
all_dates = []
for d in eccc_dates:
    for i in range(24):
        all_dates.append(pd.Timestamp(f'{d} {i}:00:00'))

In [6]:
for i in enumerate(all_dates[0:4]):
    print(i)

(0, Timestamp('2021-01-01 00:00:00'))
(1, Timestamp('2021-01-01 01:00:00'))
(2, Timestamp('2021-01-01 02:00:00'))
(3, Timestamp('2021-01-01 03:00:00'))


### Plot ECCC Smoke Emissions over time

In [7]:
eccc_lons = df_eccc['Longitude//Longitude'].values
eccc_lats = df_eccc['Latitude//Latitude'].values
eccc_dates = df_eccc['Date//Date'].values

In [25]:
# set parameters for creating visualization of each timestep with matplotlib
my_norm = "log"
my_extent = [np.min(eccc_lons), np.max(eccc_lons), np.min(eccc_lats), np.max(eccc_lats)]
my_aspect = 'auto'
my_origin = 'lower'
my_cmap = 'hot'
my_vmin = 1e-1
my_vmax = 1500
fig_w, fig_h = 15, 6
save_dir = "/usr/sci/scratch_nvme/arleth/frames/eccc_frames/"

# google map tile parameters
tile_style = 'satellite'
tile_zoom = 5

def create_frame_catch_issues(frame_date_tuple):
    # frame number to save PNG as and date to visualize
    frame_num = frame_date_tuple[0]
    date = frame_date_tuple[1]
    hour = date.hour
    
    google_terrain = cimgt.GoogleTiles(style=tile_style, cache=True)

    # set figure size
    my_fig, my_plt = plt.subplots(figsize=(fig_w, fig_h), subplot_kw=dict(projection=google_terrain.crs))
    my_plt.set_extent(my_extent, crs=ccrs.PlateCarree())
    my_plt.set_aspect('auto')
    # my_plt.coastlines()
    my_plt.gridlines(draw_labels=True)
    # my_plt.set_facecolor('black') # set background to black
    # select datapoints for given date
    date_cond = df_eccc['Date//Date'] == pd.Timestamp(day=date.day, month=date.month, year=date.year).strftime('%Y-%m-%d')

    # get the values for the given hour and get latitudes and longitudes for plotting
    curr_vals = df_eccc[date_cond][f'{hour}'].values
    curr_lats = df_eccc[date_cond]['Latitude//Latitude'].values
    curr_lons = df_eccc[date_cond]['Longitude//Longitude'].values

    my_plt.add_image(google_terrain, tile_zoom)
    plot = my_plt.scatter(curr_lons, curr_lats, c=curr_vals, cmap=my_cmap,
                             norm=my_norm, s=5, transform=ccrs.PlateCarree(),
                             vmin=my_vmin, vmax=my_vmax)
    
    my_fig.suptitle(f'Ground level concentration of PM2.5 microns and smaller {date}\n')
    # add caption showing this is from ECCC dataset
    my_fig.text(0.5, -0.1, 'ECCC Data', ha='center', va='center', transform=my_plt.transAxes)

    my_fig.colorbar(plot, location='right', label='ug/m^3')
    # save visualization as a .PNG to our folder
    plt.savefig(save_dir + "frames%010d.png" % frame_num, dpi=280)
    plt.close(my_fig);  # close the figure after saving
    plt.show()

In [26]:
proc_lim = 40
# create frames, capturing issues 
with multiprocessing.Pool(processes=proc_lim) as pool:
    # Start a timer to measure how long the conversion takes
    start_time = time.time()
    print('starting')
    issues = pool.map(create_frame_catch_issues, enumerate([all_dates[0], all_dates[15744]]))
    print('done!')
    # End the timer and print the elapsed time
    end_time = time.time()
    print(f'Total elapsed time: {end_time - start_time}')

starting
done!
Total elapsed time: 13.636043548583984
