## Imports

In [49]:
import pandas as pd
pd.set_option('display.max_rows', None)
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import path
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy
import numpy as np
import netCDF4 as nc
np.set_printoptions(threshold=100000)
from shapely.geometry import Polygon, Point, MultiPoint
from shapely.ops import cascaded_union, unary_union, transform
from datetime import datetime, timedelta
import datetime
import math
from scipy.ndimage.interpolation import shift
import shapely.wkt
from shapely.validation import explain_validity,make_valid
import xarray as xr
import pygeos as pg
import time
import seaborn as sns
from my_functions import sat_vap_press, vap_press, hot_dry_windy, haines

from joblib import Parallel, delayed
import multiprocessing

## 

In [15]:
def make_merra_file_namelist(time):
    base_filename = '/data2/lthapa/YEAR/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.FULLDATE.nc4'
    base_filename_list = np.repeat(base_filename, len(time))

    
    for jj in range(len(time)):
        base_filename_list[jj] = base_filename_list[jj].replace('YEAR',time[jj].strftime('%Y')).\
                                    replace('FULLDATE',time[jj].strftime('%Y%m%d'))
        if (time[jj].strftime('%Y%m')=='202009'):
            base_filename_list[jj] = base_filename_list[jj].replace('400','401')
    return base_filename_list

In [98]:
def merra_timeseries(df,day_start_hour):
    df_merra = pd.DataFrame({'day': np.zeros(len(df)),'temp':np.zeros(len(df)), 'vpd':np.zeros(len(df)), 
                             'wind':np.zeros(len(df)),'hd0w0':np.zeros(len(df)), 'hd1w0':np.zeros(len(df)),
                             'hd2w0':np.zeros(len(df)),'hd3w0':np.zeros(len(df)), 'hd4w0':np.zeros(len(df)),
                             'hd5w0':np.zeros(len(df))})
    #load in the grid
    merra_grid = gpd.read_file('MERRA_GRID.geojson')
    merra_grid = merra_grid.to_crs(epsg=3347) #put into lambert conformal conic 
    
    #do the intersection, not with a for loop!
    fire_merra_intersection = gpd.overlay(df, merra_grid, how='intersection',keep_geom_type=False)
    fire_merra_intersection['grid intersection area (ha)'] =fire_merra_intersection['geometry'].area/10000
    fire_merra_intersection['weights'] = fire_merra_intersection['grid intersection area (ha)']/fire_merra_intersection['fire area (ha)'] 
    
    
    #loop over all of the days we have intersections
    times_intersect = np.unique(fire_merra_intersection[str(day_start_hour)+ 'Z Start Day'].values)
    times_utc = np.unique(fire_merra_intersection['UTC Day'].values)
    
    count = 0
    for today in times_intersect:
        print(type(today))
        print(np.datetime64(today+ ' '+str(day_start_hour)+':00:00'))
        #get the time
        df_sub = fire_merra_intersection.iloc[np.where(fire_merra_intersection[str(day_start_hour)+ 'Z Start Day'].values==today)]
        df_sub = df_sub.set_index([str(day_start_hour)+ 'Z Start Day', 'lat', 'lon'])
        intersection_sub = df_sub.to_xarray() #polygon and weights for today

        times_back = pd.date_range(start=np.datetime64(today)-np.timedelta64(5,'D'), end=np.datetime64(today)+np.timedelta64(1,'D'))
        print(times_back)
        files_back = make_merra_file_namelist(times_back)
        
        #load in all the merra files associated with this lookback window
        dat_merra = xr.open_mfdataset(files_back,concat_dim='time',combine='nested',compat='override', coords='all')
    
        #add the derived data (svp, vp, vpd)
        dat_merra=dat_merra.assign(ESAT=sat_vap_press(dat_merra.TLML))
        dat_merra=dat_merra.assign(E=vap_press(dat_merra.QLML, dat_merra.TLML))
        dat_merra=dat_merra.assign(VPD=dat_merra.ESAT-dat_merra.E)
        
        merra_daily_mean = dat_merra.resample(time='24H',base=day_start_hour, label='left').mean(dim='time') #take the daily mean        
        merra_daily_mean_region = merra_daily_mean.sel(lat = np.unique(intersection_sub['lat'].values),
                                  lon = np.unique(intersection_sub['lon'].values)) #get the location of the overlaps
        
        hd0 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        hd1 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(1,'D')).values)*
                     (intersection_sub['weights'].values))
        hd2 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(2,'D')).values)*
                     (intersection_sub['weights'].values))
        hd3 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(3,'D')).values)*
                     (intersection_sub['weights'].values))
        hd4 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(4,'D')).values)*
                     (intersection_sub['weights'].values))
        hd5 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(5,'D')).values)*
                     (intersection_sub['weights'].values))
        w = np.nansum((merra_daily_mean_region['SPEEDLML'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        t = np.nansum((merra_daily_mean_region['TLML'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        
        df_merra.iloc[count,:] = [today+ ' '+str(day_start_hour)+':00:00',t,hd0,w,hd0*w,hd1*w,hd2*w,hd3*w,hd4*w,hd5*w]
        dat_merra.close()
        count =count+1
    return df_merra
    

In [99]:
#all fires
#fire_incidents = ['BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH', 'WILLIAMS FLATS', 'SHADY','PEDRO MOUNTAIN', 'WALKER', '204 COW']

#2020 fires
#fire_incidents = ['AUGUST COMPLEX','BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH']
fire_incidents = ['LAKE']
start_time = 12
path_poly = '/data2/lthapa/ML_daily/fire_polygons/'
for jj in range(len(fire_incidents)):
    
    print(fire_incidents[jj])
    fire_daily = gpd.read_file(path_poly+fire_incidents[jj].lower().replace(' ', '_')+'_VIIRS_daily_'+str(start_time)+'Z_day_start.geojson') #polygons and attributes
    
    #get rid of rows/cols we don't need
    fire_daily=fire_daily.drop(columns=['Current Overpass'])
    fire_daily = fire_daily.drop(np.where(fire_daily['geometry']==None)[0])
    fire_daily['fire area (ha)'] = fire_daily['geometry'].area/10000 #hectares
    fire_daily.set_geometry(col='geometry', inplace=True) #designate the geometry column
    fire_daily = fire_daily.rename(columns={'Current Day':'UTC Day', 'Local Day': str(start_time)+ 'Z Start Day'})
    
    #merra
    me = merra_timeseries(fire_daily,12)
    print(me)
    #me.to_csv('./fire_features/'+fire_incidents[jj].lower().replace(' ', '_')+'_Daily_MERRA_Moving_Average_2.csv') #daily averages

LAKE
<class 'str'>
2020-08-12T12:00:00
DatetimeIndex(['2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10',
               '2020-08-11', '2020-08-12', '2020-08-13'],
              dtype='datetime64[ns]', freq='D')
<class 'str'>
2020-08-13T12:00:00
DatetimeIndex(['2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11',
               '2020-08-12', '2020-08-13', '2020-08-14'],
              dtype='datetime64[ns]', freq='D')
<class 'str'>
2020-08-14T12:00:00
DatetimeIndex(['2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12',
               '2020-08-13', '2020-08-14', '2020-08-15'],
              dtype='datetime64[ns]', freq='D')
<class 'str'>
2020-08-15T12:00:00
DatetimeIndex(['2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13',
               '2020-08-14', '2020-08-15', '2020-08-16'],
              dtype='datetime64[ns]', freq='D')
<class 'str'>
2020-08-16T12:00:00
DatetimeIndex(['2020-08-11', '2020-08-12', '2020-08-13', '2020-08-14',
               '2020-08-15', '2020-08-16', '20

In [53]:
times_back = pd.date_range(start=np.datetime64('2020-08-12'), freq='H', periods=24*2)
print(times_back)


ds = xr.Dataset(
    data_vars=dict(
        test=([ "time"], np.linspace(0,len(times_back)),
    ),
    coords=dict(
        time=times_back,
    )
    ))
print(ds)

DatetimeIndex(['2020-08-12 00:00:00', '2020-08-12 01:00:00',
               '2020-08-12 02:00:00', '2020-08-12 03:00:00',
               '2020-08-12 04:00:00', '2020-08-12 05:00:00',
               '2020-08-12 06:00:00', '2020-08-12 07:00:00',
               '2020-08-12 08:00:00', '2020-08-12 09:00:00',
               '2020-08-12 10:00:00', '2020-08-12 11:00:00',
               '2020-08-12 12:00:00', '2020-08-12 13:00:00',
               '2020-08-12 14:00:00', '2020-08-12 15:00:00',
               '2020-08-12 16:00:00', '2020-08-12 17:00:00',
               '2020-08-12 18:00:00', '2020-08-12 19:00:00',
               '2020-08-12 20:00:00', '2020-08-12 21:00:00',
               '2020-08-12 22:00:00', '2020-08-12 23:00:00',
               '2020-08-13 00:00:00', '2020-08-13 01:00:00',
               '2020-08-13 02:00:00', '2020-08-13 03:00:00',
               '2020-08-13 04:00:00', '2020-08-13 05:00:00',
               '2020-08-13 06:00:00', '2020-08-13 07:00:00',
               '2020-08-

TypeError: variable 'coords' has invalid type <class 'dict'>

In [112]:
fire_daily = gpd.read_file('./fire_polygons/lake_VIIRS_daily.geojson')
fire_daily_latlon = fire_daily.to_crs(epsg=4326)

#load in the merra grid
merra_grid = xr.open_dataset('HRRR_GRID.nc')


#for each fire_daily polygon
for ii in range(1):#len(fire_daily)):
    #get the bounds
    bounds = fire_daily_latlon['geometry'].iloc[ii].bounds
    print(bounds)
    [rows,cols] = np.where((merra_grid.LAT_CTR>bounds[1])&
                    (merra_grid.LAT_CTR<bounds[3])&
                    (merra_grid.LON_CTR>bounds[0])&
                    (merra_grid.LON_CTR<bounds[2]))
    #print(rows,cols)
    
    if rows.size==0:
        print('empty!')
        lat_middle = (bounds[1]+bounds[3])/2
        lon_middle = (bounds[0]+bounds[2])/2

        distance = np.sqrt((merra_grid.LAT_CTR-lat_middle)**2+(merra_grid.LON_CTR-lon_middle)**2)
        row_min_location,col_min_location = np.where(distance ==np.min(distance))
        rows = np.append(rows,row_min_location)
        cols = np.append(cols,col_min_location)
    
    print(rows,cols)
    
    #make a geodataframe (in paralell of the rows and cols)
    tic = time.time()
    results = Parallel(n_jobs=6)(delayed(build_one_gridcell)
                                 (merra_grid['LAT_COR'].values, merra_grid['LON_COR'].values,
                                  merra_grid['LAT_CTR'].values, merra_grid['LON_CTR'].values,i,j) 
                                 for i in rows for j in cols)
    toc = time.time()
    print(toc-tic)
    df_grid=gpd.GeoDataFrame(results)
    df_grid.columns = ['lat', 'lon', 'row', 'col', 'geometry']
    df_grid.set_geometry(col='geometry',inplace=True,crs='EPSG:4326') #need to say it's in lat/lon before transform to LCC
    df_grid=df_grid.to_crs(epsg=3347)
    #print(df_grid)
    
    fire_today = gpd.GeoDataFrame(fire_daily.iloc[ii:ii+1,:])
    fire_today.set_geometry(col='geometry',inplace=True)
    #print(fire_today)
    
    #intersect the polygon with the grid subset
    print(gpd.overlay(fire_today, df_grid, how='intersection',keep_geom_type=False))
    
    
#make the geodataframe
#do the intersection

merra_grid.close()

(-118.57547009085407, 34.629992417780734, -118.45018028129323, 34.71835637672822)
[457 457 457 457 458 458 458 458 459 459 459 460] [259 260 261 262 259 260 261 262 260 261 262 260]
3.8383052349090576
    Current Day  Incident Number Fire Name  Current Overpass   Lat Fire  \
0    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
1    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
2    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
3    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
4    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
5    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
6    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
7    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
8    2020-08-13       11773470.0      LAKE            2100.0  34.678611   
9    2020-08-13       11773470.0      LAKE       

In [47]:
#makes and saves a geodataframe of a grid given the center and corner points for that grid as 2D matrices
def build_one_gridcell(LAT_COR, LON_COR, LAT_CTR, LON_CTR, ii,jj):
    #print(ii,jj,count)
    #print(LAT_CTR[ii,jj], LON_CTR[ii,jj]) #ctr
    sw = (LON_COR[ii, jj],LAT_COR[ii, jj]) #SW
    se =(LON_COR[ii, jj+1],LAT_COR[ii, jj+1]) #SE
    nw = (LON_COR[ii+1, jj],LAT_COR[ii+1, jj]) #NW
    ne = (LON_COR[ii+1, jj+1],LAT_COR[ii+1, jj+1]) #NE
            
    poly_cell = Polygon([sw,nw,ne,se])
    
    return LAT_CTR[ii,jj], LON_CTR[ii,jj],ii,jj,poly_cell

In [101]:
fire_today = gpd.GeoDataFrame(fire_daily.iloc[0:1,:])
print(fire_today)
print(df_grid)
print(gpd.overlay(fire_today,df_grid, how='intersection',keep_geom_type=False))

  Current Day  Incident Number Fire Name  Current Overpass   Lat Fire  \
0  2020-08-13       11773470.0      LAKE            2100.0  34.678611   

     Lon Fire  Number of NEW VIIRS points  NEW FRP  \
0 -118.451944                       132.0   1644.5   

                                            geometry  
0  MULTIPOLYGON (((3641986.585 323212.339, 364197...  
    lat     lon  row  col                                           geometry
0  34.5 -118.75    6    9  POLYGON ((-119.06250 34.25000, -119.06250 34.7...
Empty GeoDataFrame
Columns: [Incident Number, Number of NEW VIIRS points, Lat Fire, row, col, NEW FRP, Current Day, geometry, lat, Current Overpass, lon, Fire Name, Lon Fire]
Index: []
