This code will generate the feature vector, using sub grids to calculate the overlap

In [1]:
import pandas as pd
#pd.set_option('display.max_rows', None)
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import cartopy.crs as ccrs
import cartopy
import numpy as np
import netCDF4 as nc
np.set_printoptions(threshold=100000)
from shapely.geometry import Polygon, Point, MultiPoint
from shapely.ops import cascaded_union
from datetime import datetime, timedelta
import time
import warnings
warnings.filterwarnings('ignore')
from joblib import Parallel, delayed

##  Step 0. Throw out fires that have 0 viirs detects on any day. (fires are already filtered to be in the right lat/lon range

In [2]:
# load in the polygons and attributes
fire_polygons = gpd.read_file('fire_polygons_only.geojson')
fire_attributes = pd.read_csv('fire_polygons_attributes.csv')
incidents = pd.read_csv('unique_fires.csv')
print(len(fire_polygons), len(fire_attributes), len(incidents))

#throw out fires that have 0 days with detects
daily_drop_inds = []
incident_drop_inds = []
for item in incidents['Incident Number']:
    pts = fire_attributes['Number of VIIRS points'].values[fire_attributes['Incident Number']==item]
    if len(pts[pts==0])==len(pts):
        #print(item, 'oops all zeros')
        daily_drop_inds = np.append(daily_drop_inds, np.where(fire_attributes['Incident Number']==item)[0])
        incident_drop_inds = np.append(incident_drop_inds, np.where(incidents['Incident Number']==item)[0])

daily_drop_inds = [int(p) for p in daily_drop_inds]
#print(daily_drop_inds)

fire_attributes = fire_attributes.drop(labels= daily_drop_inds)
fire_attributes = fire_attributes.drop(labels='Unnamed: 0', axis=1)
fire_polygons = fire_polygons.drop(labels = daily_drop_inds)
incidents = incidents.drop(labels = incident_drop_inds)
print(len(fire_polygons), len(fire_attributes), len(incidents))

fire_attributes

22280 22280 1354
19137 19137 1036


Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points
5,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0
6,10662684.0,PAINTED WAGON,2019-04-09,33.786944,-112.753333,0.0
7,10662684.0,PAINTED WAGON,2019-04-10,33.786944,-112.753333,0.0
8,10663171.0,LONE MOUNTAIN,2019-05-26,33.808056,-105.738611,0.0
9,10663171.0,LONE MOUNTAIN,2019-05-27,33.808056,-105.738611,0.0
...,...,...,...,...,...,...
22275,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0
22276,11980587.0,CREEK 5,2020-12-26,33.402646,-117.266795,0.0
22277,11980587.0,CREEK 5,2020-12-27,33.402646,-117.266795,0.0
22278,11980587.0,CREEK 5,2020-12-28,33.402646,-117.266795,0.0


## 1. Figure out the start and end date we will use for each fire

In [None]:
# first check what fraction of the fire lifetimes are 0 days
for item in incidents['Incident Number']:
    pts = fire_attributes['Number of VIIRS points'].values[fire_attributes['Incident Number']==item]
    #print(len(np.where(pts==0)[0]))
    print('The fire lifetime is ', len(np.where(pts!=0)[0])*100/len(pts), '% nonzero detections')

In [3]:
keep_days = []
for item in incidents['Incident Number']:
    pts = fire_attributes['Number of VIIRS points'].values[fire_attributes['Incident Number']==item]
    dys = fire_attributes['Current Day'].values[fire_attributes['Incident Number']==item]
    nonzero = np.where(pts!=0)[0]
    first_day = dys[nonzero[0]]
    last_day = dys[nonzero[len(nonzero)-1]]
    keep_days = np.append(keep_days, np.where((fire_attributes['Incident Number']==item)&(fire_attributes['Current Day']>=first_day)&(fire_attributes['Current Day']<=last_day))[0])

keep_days = [int(p) for p in keep_days]
print(len(keep_days))
fire_attributes = fire_attributes.iloc[keep_days, :]
fire_polygons = fire_polygons.iloc[keep_days, :]
fire_attributes = fire_attributes.reset_index(drop=True) #reset the indices beecause we dropped some
fire_polygons = fire_polygons.reset_index(drop=True) #reset the indices beecause we dropped some

fire_polygons

8161


Unnamed: 0,geometry
0,"POLYGON ((-112.75455 33.78632, -112.76012 33.7..."
1,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
2,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
3,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
4,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
...,...
8156,"POLYGON ((-112.11669 34.11898, -112.12564 34.1..."
8157,"POLYGON ((-117.03702 33.90632, -117.04484 33.9..."
8158,"POLYGON ((-117.27595 33.37881, -117.27495 33.3..."
8159,"POLYGON ((-117.29549 33.36558, -117.29713 33.4..."


## 2. Loop through the days and get the raw features (temp at different pressure levels, wind, pressure, precip, terrain info) and the labels (QFED PM2.5 from the next day)

In [6]:
# THIS WAY IS PRETTY SLOW

#pre-allocate the data frame
num_days = len(fire_attributes)
fire_features = gpd.GeoDataFrame({'Incident Number': np.nan*np.zeros(num_days), 
                          'Fire Name': np.nan*np.zeros(num_days), 
                         'Current Day': np.nan*np.zeros(num_days),
                        'Lat Fire': np.nan*np.zeros(num_days), 
                     'Lon Fire': np.nan*np.zeros(num_days), 
                         'Number of VIIRS points': np.nan*np.zeros(num_days),
                        'TLML': np.nan*np.zeros(num_days), 
                        'QLML': np.nan*np.zeros(num_days),
                                  'SPEEDLML': np.nan*np.zeros(num_days),
                                  'PS': np.nan*np.zeros(num_days),
            'geometry': np.nan*np.zeros(num_days)}) # pre-allocate the dataframe

fire_features['Incident Number'] = fire_attributes['Incident Number'].values
fire_features['Fire Name'] = fire_attributes['Fire Name'].values
fire_features['Current Day'] = fire_attributes['Current Day'].values
fire_features['Lat Fire'] = fire_attributes['Lat Fire'].values
fire_features['Lon Fire'] = fire_attributes['Lon Fire'].values
fire_features['Number of VIIRS points'] = fire_attributes['Number of VIIRS points'].values
fire_features['geometry'] = fire_polygons['geometry'].values

varis = {'WESTUS_MERRA2_400.inst1_2d_lfo_Nx.':['TLML', 'QLML', 'SPEEDLML', 'PS']}
locs = {'WESTUS_MERRA2_400.inst1_2d_lfo_Nx.':['lat', 'lon']}
subfolders = {'/MERRA2/'}
file_suffix = '.nc4'

for ii in range(num_days): #loop over all the days
    #get the day
    year = fire_features['Current Day'][ii][0:4]
    month = fire_features['Current Day'][ii][5:7]
    day = fire_features['Current Day'][ii][8:10]
    #print(year, month, day)
    
    for folder in subfolders:
        path = '/data2/lthapa/'+str(year)+folder
        #print(path)
        
        #THIS COULD BE A SEPARATE HELPER FUNCTION
        for file_prefix in varis.keys(): # for each dataset
            path = path+file_prefix+year+month+day+file_suffix
            print(path)
            dataset = nc.Dataset(path)
            
            #get the lat and lon for the dataset
            loc_vals = []
            for loc in locs.values():
                loc_vals.append(loc)
            loc_vals = loc_vals[0]
            lat = dataset[loc_vals[0]][:]
            lon = dataset[loc_vals[1]][:]
            #turn it into a meshgrid
            LON, LAT = np.meshgrid(lon, lat)
            #get the full variable, put it to daily resolution
            for value in varis.values(): 
                for var in value:
                    variable = dataset[var][:]
                    variable = np.nanmean(variable, axis=0) #average across the day
                    #print(variable.shape, LON.shape, LAT.shape)
            
                    #find the intersection
                    overlaps = calculate_overlaps(LAT, LON, fire_features['geometry'][ii])
                    variable_weighted = np.multiply(variable,overlaps)
                    #save it to the dataframe
                    fire_features.loc[ii, var] = np.nansum(variable_weighted)
fire_features

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190408.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190609.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190610.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190611.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190612.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190613.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190614.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190615.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190616.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190617.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190618.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190619.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190620.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190721.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190715.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190525.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190713.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190714.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190715.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190716.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190717.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190718.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190719.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190720.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190721.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190722.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190723.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190723.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190723.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190803.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190804.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190805.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190806.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190807.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190808.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190809.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190810.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190811.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190812.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190827.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190828.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190829.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190830.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190831.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190901.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190902.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190903.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190904.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190905.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191028.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190803.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191024.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190819.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190820.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190821.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190822.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190823.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190824.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190825.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190826.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190827.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190828.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190829.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190830.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190831.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191003.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191004.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191005.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191006.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191007.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191008.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191009.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191010.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191011.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191012.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191013.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190904.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190905.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190705.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190706.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190707.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190708.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190709.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190710.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190711.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190712.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190713.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190624.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190820.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191007.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20191008.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190912.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190913.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190914.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190915.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190916.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190802.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190803.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190804.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190805.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190806.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190807.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190808.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190809.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190901.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190902.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190903.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190904.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190905.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190820.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190816.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190817.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190818.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190819.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190820.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190821.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190822.nc4
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2

KeyboardInterrupt: 

In [95]:
# dictionaries that track the variables, dimensions, times and levels
varis = {'/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.*.nc4':['TLML', 'QLML', 'SPEEDLML', 'PS'], 
        '/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.*.nc4': ['T', 'QV'],
         '/static_maps/static_map_1km.nc4': ['fccs', 'slp', 'asp'],
        '/QFED/WESTUS_qfed2.emis_pm25.006.*.nc4':['biomass']}

#0=lat, 1=lon, 2=time, 3=level 
dims = {'/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.*.nc4':['lat', 'lon', 'time'], 
       '/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.*.nc4':['lat', 'lon', 'time', 'lev'],
        '/static_maps/static_map_1km.nc4': ['lat', 'lon'],
       '/QFED/WESTUS_qfed2.emis_pm25.006.*.nc4':['lat', 'lon', 'time']}

times = {'TLML':12, 'QLML':12, 'SPEEDLML':12, 'PS':12, 'T':12, 'QV':12, 'biomass':12}
levels = {'T':[500, 700], 'QV':[700]} 
file_suffix = '.nc4'

#pre-allocate the data frame
num_days = len(fire_attributes)
fire_features = gpd.GeoDataFrame({'Incident Number': np.nan*np.zeros(num_days), 'Fire Name': np.nan*np.zeros(num_days), 
                             'Current Day': np.nan*np.zeros(num_days), 'Lat Fire': np.nan*np.zeros(num_days), 
                             'Lon Fire': np.nan*np.zeros(num_days), 'Number of VIIRS points': np.nan*np.zeros(num_days),
                            'TLML_12Z': np.nan*np.zeros(num_days), 'QLML_12Z': np.nan*np.zeros(num_days),
                            'SPEEDLML_12Z': np.nan*np.zeros(num_days),'PS_12Z': np.nan*np.zeros(num_days),
                            'T_12Z_700mb': np.nan*np.zeros(num_days),'T_12Z_500mb': np.nan*np.zeros(num_days),
                            'QV_12Z_700mb': np.nan*np.zeros(num_days), 'fccs': np.nan*np.zeros(num_days),
                            'slp':np.nan*np.zeros(num_days), 'asp':np.nan*np.zeros(num_days),
                            'biomass_12Z': np.nan*np.zeros(num_days),'geometry': np.nan*np.zeros(num_days)}) # pre-allocate the dataframe
#'fccs': np.nan*np.zeros(num_days),'slp': np.nan*np.zeros(num_days),'asp': np.nan*np.zeros(num_days), 
fire_features['Incident Number'] = fire_attributes['Incident Number'].values
fire_features['Fire Name'] = fire_attributes['Fire Name'].values
fire_features['Current Day'] = fire_attributes['Current Day'].values
fire_features['Lat Fire'] = fire_attributes['Lat Fire'].values
fire_features['Lon Fire'] = fire_attributes['Lon Fire'].values
fire_features['Number of VIIRS points'] = fire_attributes['Number of VIIRS points'].values
fire_features['geometry'] = fire_polygons['geometry'].values

#print(fire_features)
unique_days = np.unique(fire_features['Current Day'].values)
print(len(unique_days))
for dd in range(len(unique_days)):

    date=unique_days[dd]
    yr = date[0:4]
    mo = date[5:7]
    dy = date[8:10]
    print(yr, mo,dy)
    #find the polygons for the given day

    for i in varis : #loop over the file names
        #get the path to the file
        path = '/data2/lthapa/'+yr+i
        if '*' in path:
            path = path.replace('*', yr+mo+dy)
        if np.datetime64(date)>=np.datetime64('2020-09-01') and np.datetime64(date)<=np.datetime64('2020-09-30'):
            path = path.replace('400', '401')
            
        #load the file in as a netcdf dataset
        print(path)
        dat = nc.Dataset(path)
        ndims = len(dims[i])
        if ndims ==2: #lat and lon
            lat = dat[dims[i][0]][:]
            lon = dat[dims[i][1]][:]
        elif ndims == 3: #lat, lon, and time
            lat = dat[dims[i][0]][:]
            lon = dat[dims[i][1]][:]
            t = dat[dims[i][2]][:]
        elif ndims ==4: #lat,lon,time,and level
            lat = dat[dims[i][0]][:]
            lon = dat[dims[i][1]][:]
            t = dat[dims[i][2]][:]
            lev = dat[dims[i][3]][:]
        if len(lat.shape)==1:
            lon, lat = np.meshgrid(lon, lat)
            
        #calculate the areas of the lat/lon grid for the DATASET
        lat_corners, lon_corners = calculate_grid_cell_corners(lat, lon)
        inds = np.where(fire_features['Current Day']==date)[0]
        polys = fire_features['geometry'].iloc[inds].values
        
        for p in range(len(inds)):
            tic = time.time()
            #add a halo to the polygon
            poly = polys[p]
            poly_buff = poly.buffer(1)
            # get the bounds of the buffered polygon
            w,s,e,n = poly_buff.bounds
            #print(w,s,e,n)
            s_ind = np.unique(np.argmin(np.abs(lat-s), axis=0))[0]
            n_ind = np.unique(np.argmin(np.abs(lat-n), axis=0))[0]
            w_ind = np.unique(np.argmin(np.abs(lon-w), axis=1))[0]
            e_ind = np.unique(np.argmin(np.abs(lon-e), axis=1))[0]
            #print(w_ind,s_ind,e_ind,n_ind)
            #lat_sub_inds, lon_sub_inds = np.where((lat>=s)&(lat<=n)&(lon>=w)&(lon<=e))
            #print(lat_sub_inds, lon_sub_inds)
            #remove indices = 0, lat.shape[0], and lat.shape[1]
            lastrow = lat.shape[0]-1
            lastcol = lat.shape[1]-1
        
            #print(lastrow, lastcol)
            

           # print(lat_sub_inds, lon_sub_inds)
    
            
            #tic = time.time()
            overlaps = calculate_overlaps(lat,lon,lat_corners,lon_corners,s_ind,n_ind,w_ind,e_ind, poly) #calculate the overlap between the poly and the lat/lon grid
            print(np.where(overlaps!=0))
            #toc = time.time()
            #print('Time elapsed for calculating overlaps for polygon:', toc-tic)
            for j in varis[i]: #loop over the variable names, which are lists
                var = dat[j][:]
                #print('the dimensions of', j, 'are', var.shape, 'and', j, 'has', ndims, 'dimensions')
                #select the times and levels we need
                if ndims ==2: #lat and lon
                    # calculate overlap
                    #print(var.shape)
                    var_weighted = np.multiply(var,overlaps)
                    var_ave = np.nansum(var_weighted)
                    name_in_table = j
                    fire_features.loc[inds[p],name_in_table] = var_ave
                    
                elif ndims == 3: #lat, lon, and time
                    #select the needed times (12Z)
                    if j in times.keys():
                        ind_time = np.argmin(np.abs(times[j]*60-t))
                        #filter the variable by time
                        var = var[ind_time, :, :]
                        #print(var.shape)
                        var_weighted = np.multiply(var,overlaps)
                        var_ave = np.nansum(var_weighted)
                        name_in_table = j+'_'+str(times[j])+'Z'
                        fire_features.loc[inds[p],name_in_table] = var_ave

                elif ndims ==4: #lat,lon,time,and level
                    #select the needed times and levels
                    if j in times.keys():
                        ind_time = np.argmin(np.abs(times[j]*60-t))
                        #filter the variable by time
                        var = var[ind_time, :, :, :]
                        #filter by levels
                        if j in levels.keys(): #if there are levels, loop through them
                            for l in levels[j]:
                                ind_lev = np.argmin(np.abs(l-lev))
                                var_weighted = np.multiply(var[ind_lev, :, :], overlaps)
                                var_ave = np.nansum(var_weighted)
                                name_in_table = j+'_'+str(times[j])+'Z_'+str(l)+'mb'
                                fire_features.loc[inds[p],name_in_table] = var_ave
            toc = time.time()
            print('Time to process polygon=',toc-tic)
        dat.close() 
print(inds[p])
fire_features.loc[inds[p],:]

442
2019 02 17
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190217.nc4
(array([4]), array([17]))
Time to process polygon= 0.011193990707397461
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.20190217.nc4
(array([4]), array([17]))
Time to process polygon= 0.03892874717712402
/data2/lthapa/2019/static_maps/static_map_1km.nc4
(array([202, 202, 203, 203, 204, 204, 205, 205, 206, 206, 207]), array([935, 936, 935, 936, 935, 936, 935, 936, 935, 936, 935]))
Time to process polygon= 2.782203435897827
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190217.nc4
(array([18]), array([105]))
Time to process polygon= 0.02783679962158203
2019 02 18
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190218.nc4
(array([4]), array([17]))
Time to process polygon= 0.01071929931640625
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.20190218.nc4
(array([4]), array([17]))
Time to process polygon= 0.03887629508972168
/data2/lthapa/2019/static_maps/static_

(array([307, 308, 308, 309, 309]), array([1088, 1088, 1089, 1088, 1089]))
Time to process polygon= 2.998851776123047
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190427.nc4
(array([27]), array([122]))
Time to process polygon= 0.030521631240844727
2019 05 01
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190501.nc4
(array([23]), array([5]))
Time to process polygon= 0.011146783828735352
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.20190501.nc4
(array([23]), array([5]))
Time to process polygon= 0.03864264488220215
/data2/lthapa/2019/static_maps/static_map_1km.nc4
(array([1294, 1294, 1295, 1295]), array([284, 285, 284, 285]))
Time to process polygon= 2.976022481918335
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190501.nc4
(array([116]), array([32]))
Time to process polygon= 0.03059554100036621
2019 05 02
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190502.nc4
(array([23]), array([5]))
Time to process polygon= 0.011180877685546

(array([1298, 1298, 1299, 1299, 1299, 1300, 1300]), array([222, 223, 222, 223, 224, 222, 223]))
Time to process polygon= 2.968374252319336
(array([1785, 1785, 1786]), array([732, 733, 732]))
Time to process polygon= 2.943058967590332
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190508.nc4
(array([116]), array([25]))
Time to process polygon= 0.035538673400878906
(array([160]), array([82]))
Time to process polygon= 0.026918888092041016
2019 05 09
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190509.nc4
(array([23]), array([4]))
Time to process polygon= 0.011332273483276367
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.20190509.nc4
(array([23]), array([4]))
Time to process polygon= 0.038439273834228516
/data2/lthapa/2019/static_maps/static_map_1km.nc4
(array([1298, 1299, 1299]), array([222, 222, 223]))
Time to process polygon= 2.996182441711426
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190509.nc4
(array([116]), array([25]))
Time to process 

(array([478, 479]), array([1207, 1207]))
Time to process polygon= 2.9633078575134277
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190526.nc4
(array([43]), array([136]))
Time to process polygon= 0.03577566146850586
2019 05 27
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190527.nc4
(array([9]), array([22]))
Time to process polygon= 0.011104345321655273
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst3_3d_asm_Np.20190527.nc4
(array([9]), array([22]))
Time to process polygon= 0.03767585754394531
/data2/lthapa/2019/static_maps/static_map_1km.nc4
(array([478, 479]), array([1207, 1207]))
Time to process polygon= 2.7112481594085693
/data2/lthapa/2019/QFED/WESTUS_qfed2.emis_pm25.006.20190527.nc4
(array([43]), array([136]))
Time to process polygon= 0.03340864181518555
2019 05 28
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.20190528.nc4
(array([9]), array([22]))
Time to process polygon= 0.010660409927368164
/data2/lthapa/2019/MERRA2/WESTUS_MERRA2_400.inst3_

Time to process polygon= 0.012441873550415039
/data2/lthapa/2019/static_maps/static_map_1km.nc4
(array([474, 474, 475, 475, 475, 476, 476, 476, 476, 476, 477, 477, 477,
       477, 477, 478, 478, 478, 478, 478, 478, 479, 479, 479, 479, 479,
       480, 480, 480, 480, 481, 481, 481, 481, 482, 482, 482, 482, 483,
       483]), array([1206, 1207, 1206, 1207, 1208, 1205, 1206, 1207, 1208, 1209, 1205,
       1206, 1207, 1208, 1209, 1205, 1206, 1207, 1208, 1209, 1210, 1205,
       1206, 1207, 1208, 1209, 1205, 1206, 1207, 1208, 1204, 1205, 1206,
       1207, 1204, 1205, 1206, 1207, 1204, 1205]))
Time to process polygon= 2.8844804763793945
(array([387, 387, 388, 388, 388, 389, 389, 389, 390, 390, 390, 391, 391,
       391, 391, 392]), array([1214, 1215, 1214, 1215, 1216, 1214, 1215, 1216, 1214, 1215, 1216,
       1214, 1215, 1216, 1217, 1217]))
Time to process polygon= 2.7345311641693115
(array([301, 302, 302]), array([1408, 1407, 1408]))
Time to process polygon= 2.7444801330566406
/data2/lth

KeyboardInterrupt: 

In [72]:
fire_features
#therest = fire_features[['Incident Number', 'Fire Name', 'Current Day','Lat Fire', 'Lon Fire','Number of VIIRS points', 'TLML_12Z', 'QLML_12Z','SPEEDLML_12Z','PS_12Z','T_12Z_700mb','T_12Z_500mb','QV_12Z_700mb', 'biomass_12Z']]



#therest.to_csv('fire_features.csv')

Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points,TLML_12Z,QLML_12Z,SPEEDLML_12Z,PS_12Z,T_12Z_700mb,T_12Z_500mb,QV_12Z_700mb,fccs,slp,asp,biomass_12Z,geometry
0,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0,,,,,,,,,,,,"POLYGON ((-112.75455 33.78632, -112.76012 33.7..."
1,10663171.0,LONE MOUNTAIN,2019-06-09,33.808056,-105.738611,2.0,,,,,,,,,,,,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
2,10663171.0,LONE MOUNTAIN,2019-06-10,33.808056,-105.738611,0.0,,,,,,,,,,,,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
3,10663171.0,LONE MOUNTAIN,2019-06-11,33.808056,-105.738611,0.0,,,,,,,,,,,,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
4,10663171.0,LONE MOUNTAIN,2019-06-12,33.808056,-105.738611,0.0,,,,,,,,,,,,"POLYGON ((-105.73275 33.80235, -105.73861 33.8..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8156,11979237.0,Trail,2020-11-02,34.147222,-112.117778,2.0,,,,,,,,,,,,"POLYGON ((-112.11669 34.11898, -112.12564 34.1..."
8157,11980186.0,SANDERSON,2020-12-13,33.889444,-117.070833,11.0,,,,,,,,,,,,"POLYGON ((-117.03702 33.90632, -117.04484 33.9..."
8158,11980587.0,CREEK 5,2020-12-24,33.402646,-117.266795,2.0,,,,,,,,,,,,"POLYGON ((-117.27595 33.37881, -117.27495 33.3..."
8159,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0,,,,,,,,,,,,"POLYGON ((-117.29549 33.36558, -117.29713 33.4..."


## Helper Functions

In [5]:
#this function calculates the area of each grid in a given grid cell
# LAT and LON should be 2d
def calculate_grid_cell_corners(LAT, LON):
    #we will assume the very edges of the polygons don't touch the boundary of the domain
    lat_corners = (LAT[0:(lat.shape[0]-1),  0:(lat.shape[1])-1] + LAT[1:(lat.shape[0]), 1:(lat.shape[1])])/2
    lon_corners = (LON[0:(lat.shape[0]-1),  0:(lat.shape[1])-1] + LON[1:(lat.shape[0]), 1:(lat.shape[1])])/2
    return lat_corners, lon_corners
    """for ii in range(1,LAT.shape[0]-1):
        for jj in range(1, LAT.shape[1]-1):
            cell_lat = LAT[ii, jj]
            cell_lon = LON[ii, jj]
            SW_lat[ii,jj] = LAT[ii,jj] + (LAT[ii-1,jj-1] - LAT[ii,jj])/2
            SW_lon[ii,jj] = LON[ii,jj] + (LON[ii-1,jj-1] - LON[ii,jj])/2
            SE_lat[ii,jj] = LAT[ii,jj] + (LAT[ii-1,jj-1] - LAT[ii,jj])/2
            SE_lon[ii,jj] = LON[ii,jj] + (LON[ii,jj] - LON[ii-1,jj-1])/2
            NW_lat[ii,jj] = LAT[ii,jj] + (LAT[ii,jj] - LAT[ii-1,jj-1])/2
            NW_lon[ii,jj] = LON[ii,jj] + (LON[ii-1,jj-1] - LON[ii,jj])/2
            NE_lat[ii,jj] = LAT[ii,jj] + (LAT[ii,jj] - LAT[ii-1,jj-1])/2
            NE_lon[ii,jj] = LON[ii,jj] + (LON[ii,jj] - LON[ii-1,jj-1])/2
    return  SW_lat, SW_lon, SE_lat, SE_lon, NW_lat, NW_lon, NE_lat, NE_lon"""

In [88]:
# this function calculates the overlap between the given shape (fire_shape) and given grid (LAT, LON). 
#LAT and LON should be 2D

def calculate_overlaps(lat, lon, lat_corners, lon_corners, lat_start, lat_stop, lon_start, lon_stop, fire_shape):
    overlaps = np.zeros(lat.shape)
    # buffer the fire shape so it has a non zero area
    if fire_shape.area ==0:
        fire_shape = fire_shape.buffer(0.01)
    # find the intersection between the first day's polygon and the  grid
    for ii in range(lat_start, lat_stop):
        for jj in range(lon_start, lon_stop):
            #print('ctr', lat[ii,jj], lon[ii,jj])
            """ SW_lat = lat_corners[ii-1, jj-1]
            SW_lon = lon_corners[ii-1, jj-1]
            SE_lat = lat_corners[ii-1, jj-1]
            SE_lon = lon_corners[ii-1, jj]
            NW_lat = lat_corners[ii, jj-1]
            NW_lon = lon_corners[ii, jj-1]
            NE_lat = lat_corners[ii, jj]
            NE_lon = lon_corners[ii, jj]"""
            poly_cell = Polygon([(lon_corners[ii, jj-1],lat_corners[ii, jj-1]), (lon_corners[ii, jj],lat_corners[ii, jj]), (lon_corners[ii-1, jj],lat_corners[ii-1, jj-1]), (lon_corners[ii-1, jj-1], lat_corners[ii-1, jj-1])])
            overlap_cell = fire_shape.intersection(poly_cell)
            #overlaps are the fraction of the POLYGON in each grid cell
            overlaps[ii,jj] = overlap_cell.area/fire_shape.area
    
    return overlaps

In [None]:
#geoseries way
        #load in the relevant geometry array
        geojson_path = path.replace('.'+yr+mo+dy+'.nc4', '_grid_polygons.geojson')
        geojson_path = geojson_path.replace('/data2/lthapa/'+yr+'/MERRA2/', './')
        geojson_path = geojson_path.replace('/data2/lthapa/'+yr+'/QFED/', './')
        print(geojson_path)
        grid_polys = gpd.read_file(geojson_path)
        
inds = np.where(grid_polys['geometry'].intersection(poly).area)[0]
            areas = grid_polys['geometry'].intersection(poly).area
            #print(areas[inds])
            #print(areas[inds]/poly.area)
            toc=time.time()
            print('Time elapsed for calculating overlaps for polygon:', toc-tic)
