## Imports

In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import path
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy
import numpy as np
import netCDF4 as nc
np.set_printoptions(threshold=100000)
from shapely.geometry import Polygon, Point, MultiPoint
from shapely.ops import cascaded_union, unary_union, transform
from datetime import datetime, timedelta
import math
from scipy.ndimage.interpolation import shift
import shapely.wkt
from shapely.validation import explain_validity,make_valid
import xarray as xr
import pygeos as pg
import time
import seaborn as sns
from my_functions import sat_vap_press, vap_press, hot_dry_windy, haines

from timezonefinder import TimezoneFinder
import pytz
import time

from joblib import Parallel, delayed
import multiprocessing

from os.path import exists


## 

In [None]:
def make_merra_file_namelist(time):
    base_filename = '/data2/lthapa/YEAR/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.FULLDATE.nc4'
    base_filename_list = np.repeat(base_filename, len(time))

    
    for jj in range(len(time)):
        base_filename_list[jj] = base_filename_list[jj].replace('YEAR',time[jj].strftime('%Y')).\
                                    replace('FULLDATE',time[jj].strftime('%Y%m%d'))
        if (time[jj].strftime('%Y%m')=='202009'):
            base_filename_list[jj] = base_filename_list[jj].replace('400','401')
    return base_filename_list

In [None]:
def merra_timeseries(df,day_start_hour):
    df_merra = pd.DataFrame({'day': np.zeros(len(df)),'temp':np.zeros(len(df)), 'vpd':np.zeros(len(df)), 
                             'wind':np.zeros(len(df)),'hd0w0':np.zeros(len(df)), 'hd1w0':np.zeros(len(df)),
                             'hd2w0':np.zeros(len(df)),'hd3w0':np.zeros(len(df)), 'hd4w0':np.zeros(len(df)),
                             'hd5w0':np.zeros(len(df))})
    #load in the grid
    merra_grid = gpd.read_file('MERRA_GRID.geojson')
    merra_grid = merra_grid.to_crs(epsg=3347) #put into lambert conformal conic 
    
    #do the intersection, not with a for loop!
    fire_merra_intersection = gpd.overlay(df, merra_grid, how='intersection',keep_geom_type=False)
    fire_merra_intersection['grid intersection area (ha)'] =fire_merra_intersection['geometry'].area/10000
    fire_merra_intersection['weights'] = fire_merra_intersection['grid intersection area (ha)']/fire_merra_intersection['fire area (ha)'] 
    
    
    #loop over all of the days we have intersections
    times_intersect = np.unique(fire_merra_intersection[str(day_start_hour)+ 'Z Start Day'].values)
    times_utc = np.unique(fire_merra_intersection['UTC Day'].values)
    
    count = 0
    for today in times_intersect:
        print(type(today))
        print(np.datetime64(today+ ' '+str(day_start_hour)+':00:00'))
        #get the time
        df_sub = fire_merra_intersection.iloc[np.where(fire_merra_intersection[str(day_start_hour)+ 'Z Start Day'].values==today)]
        df_sub = df_sub.set_index([str(day_start_hour)+ 'Z Start Day', 'lat', 'lon'])
        intersection_sub = df_sub.to_xarray() #polygon and weights for today

        times_back = pd.date_range(start=np.datetime64(today)-np.timedelta64(5,'D'), end=np.datetime64(today)+np.timedelta64(1,'D'))
        print(times_back)
        files_back = make_merra_file_namelist(times_back)
        
        #load in all the merra files associated with this lookback window
        dat_merra = xr.open_mfdataset(files_back,concat_dim='time',combine='nested',compat='override', coords='all')
    
        #add the derived data (svp, vp, vpd)
        dat_merra=dat_merra.assign(ESAT=sat_vap_press(dat_merra.TLML))
        dat_merra=dat_merra.assign(E=vap_press(dat_merra.QLML, dat_merra.TLML))
        dat_merra=dat_merra.assign(VPD=dat_merra.ESAT-dat_merra.E)
        
        merra_daily_mean = dat_merra.resample(time='24H',base=day_start_hour, label='left').mean(dim='time') #take the daily mean        
        merra_daily_mean_region = merra_daily_mean.sel(lat = np.unique(intersection_sub['lat'].values),
                                  lon = np.unique(intersection_sub['lon'].values)) #get the location of the overlaps
        
        hd0 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        hd1 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(1,'D')).values)*
                     (intersection_sub['weights'].values))
        hd2 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(2,'D')).values)*
                     (intersection_sub['weights'].values))
        hd3 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(3,'D')).values)*
                     (intersection_sub['weights'].values))
        hd4 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(4,'D')).values)*
                     (intersection_sub['weights'].values))
        hd5 = np.nansum((merra_daily_mean_region['VPD'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(5,'D')).values)*
                     (intersection_sub['weights'].values))
        w = np.nansum((merra_daily_mean_region['SPEEDLML'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        t = np.nansum((merra_daily_mean_region['TLML'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        
        df_merra.iloc[count,:] = [today+ ' '+str(day_start_hour)+':00:00',t,hd0,w,hd0*w,hd1*w,hd2*w,hd3*w,hd4*w,hd5*w]
        dat_merra.close()
        count =count+1
    return df_merra
    

In [None]:
#all fires
#fire_incidents = ['BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH', 'WILLIAMS FLATS', 'SHADY','PEDRO MOUNTAIN', 'WALKER', '204 COW']

#2020 fires
#fire_incidents = ['AUGUST COMPLEX','BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH']
fire_incidents = ['LAKE']
start_time = 12
path_poly = '/data2/lthapa/ML_daily/fire_polygons/'
for jj in range(len(fire_incidents)):
    
    print(fire_incidents[jj])
    fire_daily = gpd.read_file(path_poly+fire_incidents[jj].lower().replace(' ', '_')+'_VIIRS_daily_'+str(start_time)+'Z_day_start.geojson') #polygons and attributes
    
    #get rid of rows/cols we don't need
    fire_daily=fire_daily.drop(columns=['Current Overpass'])
    fire_daily = fire_daily.drop(np.where(fire_daily['geometry']==None)[0])
    fire_daily['fire area (ha)'] = fire_daily['geometry'].area/10000 #hectares
    fire_daily.set_geometry(col='geometry', inplace=True) #designate the geometry column
    fire_daily = fire_daily.rename(columns={'Current Day':'UTC Day', 'Local Day': str(start_time)+ 'Z Start Day'})
    
    #merra
    me = merra_timeseries(fire_daily,12)
    print(me)
    #me.to_csv('./fire_features/'+fire_incidents[jj].lower().replace(' ', '_')+'_Daily_MERRA_Moving_Average_2.csv') #daily averages

In [None]:
times_back = pd.date_range(start=np.datetime64('2020-08-12'), freq='H', periods=24*2)
print(times_back)


ds = xr.Dataset(
    data_vars=dict(
        test=([ "time"], np.linspace(0,len(times_back)),
    ),
    coords=dict(
        time=times_back,
    )
    ))
print(ds)

In [None]:
print(multiprocessing.cpu_count())


In [None]:
fire_daily = gpd.read_file('./fire_polygons/lake_VIIRS_daily.geojson')
fire_daily_latlon = fire_daily.to_crs(epsg=4326)

#load in the merra grid
merra_grid = xr.open_dataset('RAVE_GRID.nc')


#for each fire_daily polygon
for ii in range(1):#len(fire_daily)):
    #get the bounds
    bounds = fire_daily_latlon['geometry'].iloc[ii].bounds
    print(bounds)
    [rows,cols] = np.where((merra_grid.LAT_CTR>bounds[1])&
                    (merra_grid.LAT_CTR<bounds[3])&
                    (merra_grid.LON_CTR>bounds[0])&
                    (merra_grid.LON_CTR<bounds[2]))
    #print(rows,cols)
    
    if rows.size==0:
        print('empty!')
        lat_middle = (bounds[1]+bounds[3])/2
        lon_middle = (bounds[0]+bounds[2])/2

        distance = np.sqrt((merra_grid.LAT_CTR-lat_middle)**2+(merra_grid.LON_CTR-lon_middle)**2)
        row_min_location,col_min_location = np.where(distance ==np.min(distance))
        rows = np.append(rows,row_min_location)
        cols = np.append(cols,col_min_location)
    
    rows=np.arange(rows[0]-1,rows[len(rows)-1]+2,1)
    cols=np.arange(cols[0]-1,cols[len(cols)-1]+2,1)

    print(rows,cols)
    
    #make a geodataframe (in paralell of the rows and cols)
    tic = time.time()
    results = Parallel(n_jobs=6)(delayed(build_one_gridcell)
                                 (merra_grid['LAT_COR'].values, merra_grid['LON_COR'].values,
                                  merra_grid['LAT_CTR'].values, merra_grid['LON_CTR'].values,i,j) 
                                 for i in rows for j in cols)
    toc = time.time()
    print(toc-tic)
    df_grid=gpd.GeoDataFrame(results)
    df_grid.columns = ['lat', 'lon', 'row', 'col', 'geometry']
    df_grid.set_geometry(col='geometry',inplace=True,crs='EPSG:4326') #need to say it's in lat/lon before transform to LCC
    df_grid=df_grid.to_crs(epsg=3347)
    #print(df_grid)
    
    fire_today = gpd.GeoDataFrame(fire_daily.iloc[ii:ii+1,:])
    fire_today.set_geometry(col='geometry',inplace=True)
    #print(fire_today)
    
    #intersect the polygon with the grid subset
    print(gpd.overlay(fire_today, df_grid, how='intersection',keep_geom_type=False))
    
    
#make the geodataframe
#do the intersection

merra_grid.close()

In [None]:
#makes and saves a geodataframe of a grid given the center and corner points for that grid as 2D matrices
def build_one_gridcell(LAT_COR, LON_COR, LAT_CTR, LON_CTR, ii,jj):
    #print(ii,jj,count)
    #print(LAT_CTR[ii,jj], LON_CTR[ii,jj]) #ctr
    sw = (LON_COR[ii, jj],LAT_COR[ii, jj]) #SW
    se =(LON_COR[ii, jj+1],LAT_COR[ii, jj+1]) #SE
    nw = (LON_COR[ii+1, jj],LAT_COR[ii+1, jj]) #NW
    ne = (LON_COR[ii+1, jj+1],LAT_COR[ii+1, jj+1]) #NE
            
    poly_cell = Polygon([sw,nw,ne,se])
    
    return LAT_CTR[ii,jj], LON_CTR[ii,jj],ii,jj,poly_cell

In [None]:
fire_today = gpd.GeoDataFrame(fire_daily.iloc[0:1,:])
print(fire_today)
print(df_grid)
print(gpd.overlay(fire_today,df_grid, how='intersection',keep_geom_type=False))

In [None]:
def make_file_namelist(time,base_filename):
    filename_list = np.array([])
    times_back_used = np.array([])
    for jj in range(len(time)):
        fname = base_filename.replace('YYYY',time[jj].strftime('%Y')).\
                                replace('MM',time[jj].strftime('%m')).\
                                replace('DD',time[jj].strftime('%d')).\
                                replace('HH',time[jj].strftime('%H'))
        if exists(fname):
            filename_list = np.append(filename_list,fname)
            times_back_used = np.append(times_back_used,time[jj])
    return filename_list, times_back_used

In [None]:
times = pd.date_range(start=np.datetime64('2020-09-10')-np.timedelta64(5,'D'), end=np.datetime64('2020-09-10')+
                                   np.timedelta64(1,'D'),freq='D')
print(times)

print(times[0].strftime('%Y%m%d%H'))



base_filename_hrrr = '/data2/lthapa/ML_daily/pygraf/Processed_HRRR_YYYYMMDDHH.nc'
base_filename_rave = '/data2/lthapa/YYYY/AprYYYY_to_OctYYYY/Hourly_Emissions_FV3_13km_YYYYMMDD0000_YYYYMMDD2300xr.nc'
base_filename_merra = '/data2/lthapa/YYYY/MERRA2/WESTUS_MERRA2_400.inst1_2d_lfo_Nx.YYYYMMDD.nc4'


#make_file_namelist(times,base_filename_hrrr)
#make_file_namelist(times,base_filename_rave)
make_file_namelist(times,base_filename_merra)

In [None]:
string = 'hello'
print(string)

replaced = string.replace('j','q')
print(replaced)

In [None]:
q = pd.read_excel('../Query2.xlsx')

In [None]:
print(q.iloc[0:2])

# Building the code to get sit-209 data from the database

In [2]:
sit209_data=pd.read_csv('../Query2.txt')
sit209_data.columns = ['INC209R_IDENTIFIER','INCIDENT_NAME','REPORT_FROM_DATE','REPORT_TO_DATE',
              'RESTYP_IDENTIFIER', 'RESOURCE_QUANTITY','RESOURCE_PERSONNEL','CODE_NAME',
              'INC_IDENTIFIER','PCT_CONTAINED_COMPLETED'] 
print(sit209_data.iloc[0:4])

   INC209R_IDENTIFIER  INCIDENT_NAME      REPORT_FROM_DATE  \
0            11683718  Dogwood Trail  1/8/2020 12:30:00 PM   
1            11683718  Dogwood Trail  1/8/2020 12:30:00 PM   
2            11683732  Dogwood Trail  1/9/2020 12:30:00 PM   
3            11683732  Dogwood Trail  1/9/2020 12:30:00 PM   

         REPORT_TO_DATE  RESTYP_IDENTIFIER  RESOURCE_QUANTITY  \
0   1/8/2020 9:15:00 PM            9429934                2.0   
1   1/8/2020 9:15:00 PM            9429953                NaN   
2  1/10/2020 8:15:00 AM            9429928                2.0   
3  1/10/2020 8:15:00 AM            9429934                2.0   

   RESOURCE_PERSONNEL       CODE_NAME  INC_IDENTIFIER  PCT_CONTAINED_COMPLETED  
0                   2  Engine, Type 6        11683717                     90.0  
1                   0        Overhead        11683717                     90.0  
2                   2           Dozer        11683717                     96.0  
3                   2  Engine, Type 6  

In [18]:
#2020 fires
#fire_incidents = ['AUGUST COMPLEX','BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH']
#fire_incidents=['BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH']
fire_incidents = ['LAKE']

path_poly = '/data2/lthapa/ML_daily/fire_polygons/'
suffix_poly = 'Z_day_start.geojson'
day_start_hour=12
for jj in range(len(fire_incidents)):
    fire_name = fire_incidents[jj].lower().replace(' ','_')
    print(path_poly+fire_name+'_VIIRS_daily_'+str(day_start_hour)+suffix_poly)
    fire_daily = gpd.read_file(path_poly+fire_name+'_VIIRS_daily_'+str(day_start_hour)+suffix_poly)
    fire_daily=fire_daily.drop(columns=['Current Overpass'])
    fire_daily = fire_daily.drop(np.where(fire_daily['geometry']==None)[0])
    fire_daily['fire area (ha)'] = fire_daily['geometry'].area/10000 #hectares
    fire_daily.set_geometry(col='geometry', inplace=True) #designate the geometry column
    fire_daily = fire_daily.rename(columns={'Current Day':'UTC Day', 'Local Day': str(day_start_hour)+ 'Z Start Day'})
    
    fire_daily = fire_daily.iloc[np.array(fire_daily['UTC Day'].values,dtype='datetime64')<=np.datetime64('2020-10-31'),:]

    #print(sit209_data[sit209_data['INC_IDENTIFIER']==fire_daily['Incident Number'].iloc[0]])
    #get the fire incident number, lat, and lon
    incident_number = fire_daily['Incident Number'].iloc[0]
    fire_lat = fire_daily['Lat Fire'].iloc[0]
    fire_lon = fire_daily['Lon Fire'].iloc[0]
    print(incident_number, fire_lat, fire_lon)
    
    sit209_data_fire = sit209_data[sit209_data['INC_IDENTIFIER']==incident_number]
    
    #do the time zone conversion
    obj=TimezoneFinder() #initialize the timezone finder
    tz = obj.timezone_at(lng=fire_lon, lat=fire_lat) #get the timezone
    local = pytz.timezone(tz)
    utc = pytz.utc
    
    
    #put the start and end times in local time
    loc_dt_start = [local.localize(datetime.strptime(date, '%m/%d/%Y %H:%M:%S %p')) for date in sit209_data_fire['REPORT_FROM_DATE'].values]
    loc_dt_end = [local.localize(datetime.strptime(date, '%m/%d/%Y %H:%M:%S %p')) for date in sit209_data_fire['REPORT_TO_DATE'].values]
    
    #put them in UTC time
    utc_dt_start = [time_start.astimezone(utc) for time_start in loc_dt_start]
    utc_dt_end = [time_end.astimezone(utc) for time_end in loc_dt_end]
    
    start_day = pd.to_datetime(utc_dt_start[0]).strftime('%Y-%m-%d')+' '+str(day_start_hour)+':00'
    
    
    #reassign to UTC time, this DOES keep track of daylight savings (eg +7 is used for PDT, +8 is used for PST)
    sit209_data_fire['Report Start UTC'] = pd.to_datetime(utc_dt_start)
    sit209_data_fire['Report End UTC'] = pd.to_datetime(utc_dt_end)
    sit209_data_fire['Timezone']= tz
    
    #localise the index
    sit209_data_fire = sit209_data_fire.set_index(['Report Start UTC']).tz_localize(None)
    #print(sit209_data_fire.iloc[0:4])
    
    
    ## do the 12z-12z day grouping, based on the UTC times
    start_day_utc = str(utc_dt_start[0])
    start_datetime_utc = np.datetime64(start_day_utc[0:10]+'T'+str(day_start_hour).zfill(2)+':00')   
    #sit209_data_fire = sit209_data_fire.resample('24H',origin=start_datetime_utc)

    personnel = sit209_data_fire['RESOURCE_PERSONNEL'].resample('24H',origin=start_datetime_utc).sum().reset_index()
    percent_contained = sit209_data_fire['PCT_CONTAINED_COMPLETED'].resample('24H',origin=start_datetime_utc).mean().reset_index()
    
    df_sit209 = pd.concat([percent_contained,personnel.drop(columns='Report Start UTC')],axis=1)
    df_sit209.columns=['day', 'percent_contained', 'personnel']
    df_sit209['day'] = pd.to_datetime(df_sit209['day'].values).strftime('%Y-%m-%d')
    inds = df_sit209['day'].isin(fire_daily[str(day_start_hour)+'Z Start Day']).values
    print(inds)
    print(df_sit209.iloc[inds])
    print(fire_daily)
    
    #day = np.datetime64(str(utc_dt_start[0])[0:10]+'T'+str(day_start_hour).zfill(2)+':00',
    #                   str(utc_dt_start[len(utc_dt_start)-1])[0:10]+'T'+str(day_start_hour).zfill(2)+':00')
    #print(str(utc_dt_start[0])[0:10]+'T'+str(day_start_hour).zfill(2)+':00')
    #print(str(utc_dt_start[len(utc_dt_start)-1])[0:10]+'T'+str(day_start_hour).zfill(2)+':00')
    #df_sit209 = pd.DataFrame({'day':day,'personnel':personnel,'percent_contained':percent_contained})
    #print(df_sit209)
    
    """
    sit209_data_fire = sit209_data_fire.set_index(['Report Start UTC'])     
    #print(sit209_data_fire)
    
    #grab the resources and percent contained
    
    """

/data2/lthapa/ML_daily/fire_polygons/lake_VIIRS_daily_12Z_day_start.geojson
11773470.0 34.6786111 -118.4519444
[ True  True  True  True  True  True  True  True  True  True  True False
 False False False False False  True False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False]
           day  percent_contained  personnel
0   2020-08-12           0.000000       1576
1   2020-08-13           5.930233       3377
2   2020-08-14          12.000000       5444
3   2020-08-15          12.000000       3060
4   2020-08-16          21.630137       3784
5   2020-08-17          38.000000       4051
6   2020-08-18          38.000000       3808
7   2020-08-19          41.929412       4015
8   2020-08-20          52.000000       3686
9   2020-08-21          52.000000       3534
10  2020-08-22          52.000000       3666
17  2020-08-29          90.000000        370
   12Z Start Day  Incident Number F

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [1]:
from datetime import datetime, timedelta


In [97]:
print(type(pd.to_datetime(utc_dt_start)))


<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
