## **Step 1: Importing the libraries**
### Please be sure to have libraries installed

In [1]:
# for numerical work
import numpy as np

# for accessing file system
import os

# for loading netcdf files, for metadata
import xarray as xr
# for connecting OpenVisus framework to xarray
# from https://github.com/sci-visus/openvisuspy, 
from openvisuspy.xarray_backend import OpenVisusBackendEntrypoint

# Used for processing netCDF time data
import time
import datetime
import requests
# Used for indexing via metadata
import pandas as pd

# for plotting
import matplotlib.pyplot as plt
import cartopy.crs as ccrs


#Stores the OpenVisus cache in the local direcrtory 
import os
os.environ["VISUS_CACHE"]="./visus_cache_can_be_erased"
os.environ['CURL_CA_BUNDLE'] = ''

## **Step 2: Reading the data & metadata from file**
### In this section, we load our data using `xr.open_dataset`.

In [2]:
# # path to tiny NetCDF
# url = 'https://github.com/sci-visus/NSDF-WIRED/raw/main/data/firesmoke_metadata.nc'

# # Download the file using requests
# response = requests.get(url)
local_netcdf = 'data/firesmoke_metadata.nc'
# with open(local_netcdf, 'wb') as f:
#     f.write(response.content)
    
# open tiny netcdf with xarray and OpenVisus backend
ds = xr.open_dataset(local_netcdf, engine=OpenVisusBackendEntrypoint)

ov.LoadDataset(http://atlantis.sci.utah.edu/mod_visus?dataset=UBC_fire_smoke_BSC&cached=1)
PM25
Adding field  PM25 shape  [27357, 381, 1081, 21] dtype  float32 labels  ['time', 'ROW', 'COL', 'resolution'] Max Resolution  20


In [3]:
ds

#### Data Variables Description
| Attribute         | Description                                                                                                           |
|-------------------|-----------------------------------------------------------------------------------------------------------------------|
| PM25              | The concentration of particulate matter (PM2.5) for each time step, layer, row, and column in the spatial grid.       |
| TFLAG             | The date and time of each data point.                                                                                 |
| wrf_arw_init_time | The time at which this prediction's weather forecast was initiated.                                                   |
| resampled         | Whether this timestamp was resampled from a 381x1041 to 381x1081 grid or not.                                         |
| CDATE             | The creation date of the data point, in YYYYDDD format.                                                                  |
| CTIME             | The creation time of the data point, in HHMMSS format.                                                                   |
| WDATE             | The date for which the weather forecast is initiated, in YYYYDDD format.                                              |
| WTIME             | The time for which the weather forecast is initiated, in HHMMSS format.                                               |
| SDATE             | The date for which the smoke forecast is initiated, in YYYYDDD format.                                                |
| STIME             | The time for which the weather forecast is initiated, in HHMMSS format.                                               |

## **Step 2.5, Calculate derived metadata using original metadata above to create coordinates**
### This is required to allow for indexing of data via metadata

#### Calculate latitude and longitude grid

In [4]:
# Get metadata to compute lon and lat
xorig = ds.XORIG
yorig = ds.YORIG
xcell = ds.XCELL
ycell = ds.YCELL
ncols = ds.NCOLS
nrows = ds.NROWS

longitude = np.linspace(xorig, xorig + xcell * (ncols - 1), ncols)
latitude = np.linspace(yorig, yorig + ycell * (nrows - 1), nrows)

print("Size of longitude & latitude arrays:")
print(f'np.size(longitude) = {np.size(longitude)}')
print(f'np.size(latitude) = {np.size(latitude)}\n')
print("Min & Max of longitude and latitude arrays:")
print(f'longitude: min = {np.min(longitude)}, max = {np.max(longitude)}')
print(f'latitude: min = {np.min(latitude)}, max = {np.max(latitude)}')

Size of longitude & latitude arrays:
np.size(longitude) = 1081
np.size(latitude) = 381

Min & Max of longitude and latitude arrays:
longitude: min = -160.0, max = -51.99999839067459
latitude: min = 32.0, max = 70.00000056624413


#### Using calculated latitude and longitude, create coordinates allowing for indexing data using lat/lon

In [5]:
# Create coordinates for lat and lon (credit: Aashish Panta)
ds.coords['lat'] = ('ROW', latitude)
ds.coords['lon'] = ('COL', longitude)

# Replace col and row dimensions with newly calculated lon and lat arrays (credit: Aashish Panta)
ds = ds.swap_dims({'COL': 'lon', 'ROW': 'lat'})

#### Create coordinates allowing for indexing data using timestamp

##### First, convert tflags to timestamps that are compatible with xarray

In [6]:
def parse_tflag(tflag):
    """
    Return the tflag as a datetime object
    :param list tflag: a list of two int32, the 1st representing date and 2nd representing time
    """
    # obtain year and day of year from tflag[0] (date)
    date = int(tflag[0])
    year = date // 1000 # first 4 digits of tflag[0]
    day_of_year = date % 1000 # last 3 digits of tflag[0]

    # create datetime object representing date
    final_date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1)

    # obtain hour, mins, and secs from tflag[1] (time)
    time = int(tflag[1])
    hours = time // 10000 # first 2 digits of tflag[1]
    minutes = (time % 10000) // 100 # 3rd and 4th digits of tflag[1] 
    seconds = time % 100  # last 2 digits of tflag[1]

    # create final datetime object
    full_datetime = datetime.datetime(year, final_date.month, final_date.day, hours, minutes, seconds)
    return full_datetime

##### Return an array of the tflags as pandas timestamps

In [7]:
# get all tflags
tflag_values = ds['TFLAG'].values

# to store pandas timestamps
timestamps = []

# convert all tflags to pandas timestamps, store in timestamps list
for tflag in tflag_values:
    timestamps.append(pd.Timestamp(parse_tflag(tflag[0])))

# check out the first 3 timestamps
timestamps[0:3]

[Timestamp('2021-03-04 00:00:00'),
 Timestamp('2021-03-04 01:00:00'),
 Timestamp('2021-03-04 02:00:00')]

In [8]:
# set coordinates to each timestep with these pandas timestamps
ds.coords['time'] = ('time', timestamps)

#### The timestamps may not be intuitive. The following utility function returns the desired pandas timestamp based on your date and time of interest. 

##### When you index the data at a desired time, use this function to get the timestamp you need to index.

In [9]:
def get_timestamp(year, month, day, hour):
    """
    return a pandas timestamp using the given date-time arguments
    :param int year: year
    :param int month: month
    :param int day: day
    :param int hour: hour
    """
    # Convert year, month, day, and hour to a datetime object
    full_datetime = datetime.datetime(year, month, day, hour)
    
    # Extract components from the datetime object
    year = full_datetime.year
    day_of_year = full_datetime.timetuple().tm_yday
    hours = full_datetime.hour
    minutes = full_datetime.minute
    seconds = full_datetime.second

    # Compute tflag[0] and tflag[1]
    tflag0 = year * 1000 + day_of_year
    tflag1 = hours * 10000 + minutes * 100 + seconds

    # Return the Pandas Timestamp object
    return pd.Timestamp(full_datetime)

## **Step 3:  Select a `data_slice`**
### This section shows you how to load the data you want. 

#### You can index the data using indices, timestamps*, latitude & longitude, and by desired resolution**.
*Not setting any time means the first timestep available is selected.
**Not setting quality means full data resolution is selected.

---

##### In this case, let's get all available firesmoke data for March 5, 2021 00:00:00 and the time and date for which it's weather and smoke forecast were initiated.

In [10]:
ds.chunk("auto")

Unnamed: 0,Array,Chunk
Bytes,881.45 GiB,125.04 MiB
Shape,"(27357, 381, 1081, 21)","(116, 116, 116, 21)"
Dask graph,9440 chunks in 2 graph layers,9440 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 881.45 GiB 125.04 MiB Shape (27357, 381, 1081, 21) (116, 116, 116, 21) Dask graph 9440 chunks in 2 graph layers Data type float32 numpy.ndarray",27357  1  21  1081  381,

Unnamed: 0,Array,Chunk
Bytes,881.45 GiB,125.04 MiB
Shape,"(27357, 381, 1081, 21)","(116, 116, 116, 21)"
Dask graph,9440 chunks in 2 graph layers,9440 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,213.73 kiB,213.73 kiB
Shape,"(27357, 1, 2)","(27357, 1, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 213.73 kiB 213.73 kiB Shape (27357, 1, 2) (27357, 1, 2) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",2  1  27357,

Unnamed: 0,Array,Chunk
Bytes,213.73 kiB,213.73 kiB
Shape,"(27357, 1, 2)","(27357, 1, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,213.73 kiB,213.73 kiB
Shape,"(27357, 1, 2)","(27357, 1, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 213.73 kiB 213.73 kiB Shape (27357, 1, 2) (27357, 1, 2) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",2  1  27357,

Unnamed: 0,Array,Chunk
Bytes,213.73 kiB,213.73 kiB
Shape,"(27357, 1, 2)","(27357, 1, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,26.72 kiB,26.72 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 26.72 kiB 26.72 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type bool numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,26.72 kiB,26.72 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 106.86 kiB 106.86 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 106.86 kiB 106.86 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 106.86 kiB 106.86 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 106.86 kiB 106.86 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 106.86 kiB 106.86 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 106.86 kiB 106.86 kiB Shape (27357,) (27357,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",27357  1,

Unnamed: 0,Array,Chunk
Bytes,106.86 kiB,106.86 kiB
Shape,"(27357,)","(27357,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray


In [1]:
ds['PM25'][0]

NameError: name 'get_pd_timestamp' is not defined

In [11]:
# select timestamp
my_timestamp1 = get_timestamp(2023, 9, 26, 5)
my_timestamp2 = get_timestamp(2021, 3, 5, 0)

# select resolution, let's use full resolution since data isn't too big at one time slice
# data resolution can be -19 for lowest res and 0 for highest res
data_resolution = 0

# get PM25 values and provide 4 values, the colons mean select all lat and lon indices
data_array_at_time = ds['PM25'].loc[my_timestamp2, :, :, data_resolution]

In [12]:
data_array_at_time

In [18]:
# data_stacked_index = data_array_at_time.stack(lat_lon=["lat", "lon"])
data_stakced_index = ds['PM25'][0].stack(lat_lon=["lat", "lon"])
lats = data_stacked_index.lat.values
lons = data_stacked_index.lon.values
vals = data_stacked_index.values

Using Max Resolution:  20
Time: 0, max_resolution: 20, logic_box=(0, 1081, 0, 381), field: PM25


In [19]:
data_stakced_index

In [25]:
data_stacked_index.lat_lon.values[0]

(32.0, -160.0)

## Try contourf

In [None]:
# # Let's use matplotlib's imshow, since our data is on a grid
# # ref: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.imshow.html

# # Initialize a figure and plot, so we can customize figure and plot of data
# # ref: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.subplots.html
# my_fig, my_plt = plt.subplots(figsize=(15, 6), subplot_kw=dict(projection=ccrs.PlateCarree()))

# # Let's set some parameters to get the visualization we want
# # ref: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.imshow.html

# # color PM25 values on a log scale, since values are small
# my_norm = "log" 
# # ***this will number our x and y axes based on the longitude latitude range***
# my_extent = [np.min(lons), np.max(lons), np.min(lats), np.max(lats)]
# # ensure the aspect ratio of our plot fits all data, matplotlib can does this automatically
# my_aspect = 'auto'
# # tell matplotlib, our origin is the lower-left corner
# my_origin = 'lower'
# # select a colormap for our plot and the color bar on the right
# my_cmap = 'Oranges'

# # create our plot using imshow
# plot = plt.contourf(lons, lats, vals, levels=10, norm=my_norm, cmap=my_cmap, extend='both')

# # draw coastlines
# my_plt.coastlines()

# # draw latitude longitude lines
# # ref: https://scitools.org.uk/cartopy/docs/latest/gallery/gridlines_and_labels/gridliner.html
# my_plt.gridlines(draw_labels=True)

# # add a colorbar to our figure, based on the plot we just made above
# my_fig.colorbar(plot,location='right', label='ug/m^3')

# # # Add metadata as text annotations
# # metadata_text = (
# #     f'resampled: {resampled.values}\n'
# #     f'SDATE: {sdate.values}\n'
# #     f'STIME: {stime.values}\n'
# #     f'WDATE: {wdate.values}\n'
# #     f'WTIME: {wtime.values}'
# # )

# # # Place metadata text on the plot
# # my_plt.text(0.02, 0.02, metadata_text, transform=my_plt.transAxes,
# #             fontsize=12, verticalalignment='bottom', bbox=dict(facecolor='white', alpha=0.8))

# # # Set x and y axis labels on our ax
# # my_fig.supxlabel('Longitude')
# # my_fig.supylabel('Latitude')

# # # Set title of our figure
# # my_fig.suptitle('Ground level concentration of PM2.5 microns and smaller')

# # # Set title of our plot as the timestamp of our data
# # my_plt.set_title(f'{my_timestamp}')

# # Show the resulting visualization
# plt.show()

## Try ipyopenlayers

In [None]:
from ipyopenlayers import Map, HeatmapLayer,RasterTileLayer

zoom_start = 3
map_start = [-106, 51] # start in canada
radius = 1
blur = 2
# Create a map centered at coordinates [0, 0] with zoom level 0
m = Map(center=map_start, zoom=zoom_start)

# Add layer
layer=RasterTileLayer()
m.add_layer(layer)

# Define data points for the heatmap: [latitude, longitude, weight]
data_points = [i for i in zip(lats, lons, vals)]

# Create a HeatmapLayer with specified blur, radius, and data points
heatmap_layer = HeatmapLayer(
    points=data_points,
    blur=blur,
    radius=radius
)

# Add the HeatmapLayer to the map
m.add_layer(heatmap_layer)

# Display the map
m