In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import path
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy
import numpy as np
import netCDF4 as nc
np.set_printoptions(threshold=100000)
from shapely.geometry import Polygon, Point, MultiPoint, LineString, LinearRing
from shapely.ops import cascaded_union, unary_union, transform
import datetime
import math
from scipy.ndimage.interpolation import shift
import scipy.interpolate as si
import shapely.wkt
from shapely.validation import explain_validity
import xarray as xr
import seaborn as sns
from my_functions import sat_vap_press, vap_press, hot_dry_windy, haines
from joblib import Parallel, delayed
import multiprocessing
from os.path import exists
import rasterio
from rasterio.windows import get_data_window,Window, from_bounds
from rasterio.plot import show
from itertools import product

from timezonefinder import TimezoneFinder
import pytz

In [106]:
#process features using my polygons
sit209=pd.read_csv('../Query2.txt')
sit209.columns = ['INC209R_IDENTIFIER','INCIDENT_NAME','REPORT_FROM_DATE','REPORT_TO_DATE',
              'RESTYP_IDENTIFIER', 'RESOURCE_QUANTITY','RESOURCE_PERSONNEL','CODE_NAME',
              'INC_IDENTIFIER','PCT_CONTAINED_COMPLETED'] 
#2020 fires
#fire_incidents = ['AUGUST COMPLEX','BOBCAT', 'DOLAN', 'HOLIDAY FARM','CREEK', 'LAKE', 'CAMERON PEAK', 'PINE GULCH','EAST TROUBLESOME']
#fire_incidents = ['EAST TROUBLESOME','HOLIDAY FARM', 'CAMERON PEAK', 'PINE GULCH']
fire_incidents=['HOLIDAY FARM']
path_poly = '/data2/lthapa/ML_daily/fire_polygons/'
suffix_poly = 'Z_day_start.geojson'
start_time=12
for jj in range(len(fire_incidents)):
    fire_name = fire_incidents[jj].lower().replace(' ','_')
    print(path_poly+fire_name+'_VIIRS_daily_'+str(start_time)+suffix_poly)
    fire_daily = gpd.read_file(path_poly+fire_name+'_VIIRS_daily_'+str(start_time)+suffix_poly)
    print(fire_daily.crs)
    fire_daily=fire_daily.drop(columns=['Current Overpass'])
    fire_daily = fire_daily.drop(np.where(fire_daily['geometry']==None)[0])
    fire_daily['fire area (ha)'] = fire_daily['geometry'].area/10000 #hectares. from m2
    fire_daily.set_geometry(col='geometry', inplace=True) #designate the geometry column
    fire_daily = fire_daily.rename(columns={'Current Day':'UTC Day', 'Local Day': str(start_time)+ 'Z Start Day'})
    
    fire_daily = fire_daily.iloc[np.array(fire_daily['UTC Day'].values,dtype='datetime64')<=np.datetime64('2020-10-31'),:]
    
    #hrrr with average wind
    #hrrr = hrrr_timeseries_ws(fire_daily,start_time)
    #print(hrrr)
    #hrrr.to_csv('./fire_features/'+fire_name+'_Daily_HRRR_ws_'+str(start_time)+'Z_day_start.csv') #daily averages
    
    #rave
    #rave=rave_timeseries(fire_daily,start_time,24)
    #print(rave)
    #rave.to_csv('./fire_features/'+fire_name+'_Daily_RAVE_'+str(start_time)+'Z_day_start.csv') #daily sum
    
    #sit 209 data
    resources = resources_timeseries(fire_daily,start_time,sit209)
    print(resources)
    #resources.to_csv('./fire_features/'+fire_name+'_Daily_Resources_'+str(start_time)+'Z_day_start.csv')
    
    #fuel loading (takes 20 mins to process AC on the 1km grid)
    #fuel_loading = fuel_loading_timeseries(fire_daily,start_time)
    #print(fuel_loading)
    #fuel_loading.to_csv('./fire_features/'+fire_name+'_Daily_FUEL_LOADING_'+str(start_time)+'Z_day_start.csv') #daily averages


    #Fuel Loadings fwi
    #fuel_loading_fwi = fuel_loading_fwi_timeseries(fire_daily,12)
    #print(fuel_loading_fwi)
    #fuel_loading_fwi.to_csv('./fire_features/'+fire_name+'_Daily_FUEL_FWI_'+str(start_time)+'Z_day_start.csv') #daily averages

    
    #low res fuel loadings FWI
    #fuel_loading_fwi_lores = fuel_loading_fwi_lowres_timeseries(fire_daily,12)
    """tic = datetime.datetime.now()
    fuel_fwi_intersections = Parallel(n_jobs=8)(delayed(calculate_intersection)
                                 (fire_daily.iloc[ii:ii+1],'FUEL_FWI_GRID_990M',0.05) 
                                 for ii in range(len(fire_daily)))
    toc = datetime.datetime.now()
    print(toc-tic)
    print([fuel_fwi_intersections[jj]['weights'].sum() for jj in range(len(fuel_fwi_intersections))])
    
    fire_fuel_fwi_intersection=gpd.GeoDataFrame(pd.concat(fuel_fwi_intersections, ignore_index=True))
    fire_fuel_fwi_intersection = fire_fuel_fwi_intersection.drop(columns='geometry')
    fire_fuel_fwi_intersection = fire_fuel_fwi_intersection.set_index(['12Z Start Day','row', 'col'])
    fire_fuel_fwi_intersection_xr = fire_fuel_fwi_intersection.to_xarray()

    #path_fuel_fwi = '/data2/lthapa/ML_daily/bobcat_fuel_fwi.nc'
    #path_fuel_fwi = '/data2/lthapa/ML_daily/bobcat_fuel_fwi_360m.nc'
    path_fuel_fwi = '/data2/lthapa/ML_daily/fuel_fwi_990m.nc'
    dat_fuel_fwi = xr.open_dataset(path_fuel_fwi) #map is fixed in time
    dat_fuel_fwi_daily = dat_fuel_fwi.expand_dims({'time': pd.to_datetime(fire_fuel_fwi_intersection_xr['12Z Start Day'].values)}) #the PWS expanded over all the days

    dat_fuel_fwi_sub_daily = dat_fuel_fwi_daily.sel(row = fire_fuel_fwi_intersection_xr['row'].values, 
                                        col = fire_fuel_fwi_intersection_xr['col'].values, method='nearest')
    #preallocate space for the output
    varis = ['day','Extreme_N', 'VeryHigh_N','High_N', 'Moderate_N', 'Low_N']
    df_loading = generate_df(varis, len(fire_daily))

    df_loading['day'] = fire_daily['12Z Start Day'].values
    for var in varis[1:len(varis)]:
        df_loading[var] = np.nansum(fire_fuel_fwi_intersection_xr['weights'].values*dat_fuel_fwi_sub_daily[var].values, axis=(1,2))

    print(df_loading)
    df_loading.to_csv('./fire_features/'+fire_name+'_Daily_COARSE990_FUEL_FWI_12Z_day_start.csv') #daily averages
"""

/data2/lthapa/ML_daily/fire_polygons/holiday_farm_VIIRS_daily_12Z_day_start.geojson
epsg:3347
        INC209R_IDENTIFIER INCIDENT_NAME       REPORT_FROM_DATE  \
117023            11926607  Holiday Farm   9/8/2020 12:17:00 AM   
117024            11926607  Holiday Farm   9/8/2020 12:17:00 AM   
117025            11926607  Holiday Farm   9/8/2020 12:17:00 AM   
117026            11926607  Holiday Farm   9/8/2020 12:17:00 AM   
117027            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117028            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117029            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117030            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117031            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117032            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117033            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117034            11926627  Holiday Farm    9/8/2020 8:45:00 PM   
117035            11926627  Holiday

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# Dataset Dependent Helper Functions

## HRRR

In [17]:
def hrrr_timeseries_ws(df,day_start_hour):  #with the wind speed
    varis_derived = ['day','hd0w0','hd1w0','hd2w0','hd3w0','hd4w0','hd5w0']
    df_hrrr_derived = generate_df(varis_derived, len(df))
    
    varis_raw = ['temp_2m', 'q_2m','wind_speed','veggie', 'dewpt','weasd','soilm', 'esat_2m','e_2m',
             'vpd_2m', 'hwp','veggie', 'dewpt','veg_term','gust_max_term', 'dd_term',
            'mois_term','temp_2m', 'q_2m','snowc_term']
    df_hrrr_raw = generate_df(varis_raw, len(df))
    
   
    
    #do the intersection, in parallel
    tic=datetime.datetime.now()
    hrrr_intersections = Parallel(n_jobs=8)(delayed(calculate_intersection)
                                 (df.iloc[ii:ii+1],'HRRR_GRID',0.05) 
                                 for ii in range(len(df)))
    fire_hrrr_intersection=gpd.GeoDataFrame(pd.concat(hrrr_intersections, ignore_index=True))
    fire_hrrr_intersection.set_geometry(col='geometry')
    toc = datetime.datetime.now()
    print(toc-tic)
    
    
    #loop over all of the days we have intersections
    times_intersect = np.unique(fire_hrrr_intersection[str(day_start_hour)+ 'Z Start Day'].values)
    times_utc = np.unique(fire_hrrr_intersection['UTC Day'].values)
    
    count = 0
    for today in times_intersect:
        print(today)
        #get the time
        df_sub = fire_hrrr_intersection.iloc[np.where(fire_hrrr_intersection[str(day_start_hour)+ 'Z Start Day'].values==today)]
        df_sub = df_sub.set_index([str(day_start_hour)+ 'Z Start Day', 'row', 'col'])
        df_sub=df_sub[~df_sub.index.duplicated()]
        intersection_sub = df_sub.to_xarray() #polygon and weights for today
        
        times_back = pd.date_range(start=np.datetime64(today)-np.timedelta64(5,'D'), end=np.datetime64(today)+
                                   np.timedelta64(1,'D'),freq='H')
        files_back,times_back_used = make_file_namelist(times_back,'/data2/lthapa/ML_daily/pygraf/Processed_HRRR_YYYYMMDDHH.nc')

        #load in all the merra files associated with this lookback window
        dat_hrrr = xr.open_mfdataset(files_back,concat_dim='time',combine='nested',compat='override', coords='all')
        dat_hrrr = dat_hrrr.assign_coords({'time': times_back_used})

        #add the derived data (svp, vp, vpd)
        dat_hrrr=dat_hrrr.assign(esat_2m=sat_vap_press(dat_hrrr.temp_2m))
        dat_hrrr=dat_hrrr.assign(e_2m=vap_press(dat_hrrr.q_2m, dat_hrrr.temp_2m))
        dat_hrrr=dat_hrrr.assign(VPD=dat_hrrr.esat_2m-dat_hrrr.e_2m)
        
        hrrr_daily_mean = dat_hrrr.resample(time='24H',base=day_start_hour, label='left').mean(dim='time') #take the daily mean        
        
        hrrr_daily_mean_region = hrrr_daily_mean.sel(grid_yt = np.unique(intersection_sub['row'].values),
                                                    grid_xt = np.unique(intersection_sub['col'].values)) #get the location of the overlaps
        #print(hrrr_daily_mean_region['time'].values)
        hd0 = np.nansum((hrrr_daily_mean_region['vpd_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        hd1 = np.nansum((hrrr_daily_mean_region['vpd_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(1,'D')).values)*
                     (intersection_sub['weights'].values))
        hd2 = np.nansum((hrrr_daily_mean_region['vpd_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(2,'D')).values)*
                     (intersection_sub['weights'].values))
        hd3 = np.nansum((hrrr_daily_mean_region['vpd_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(3,'D')).values)*
                     (intersection_sub['weights'].values))
        hd4 = np.nansum((hrrr_daily_mean_region['vpd_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(4,'D')).values)*
                     (intersection_sub['weights'].values))
        hd5 = np.nansum((hrrr_daily_mean_region['vpd_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')-np.timedelta64(5,'D')).values)*
                     (intersection_sub['weights'].values))
        w = np.nansum((hrrr_daily_mean_region['wind_speed'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        t = np.nansum((hrrr_daily_mean_region['temp_2m'].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values)*(intersection_sub['weights'].values))
        
        df_hrrr_derived.iloc[count,:] = [today+ ' '+str(day_start_hour)+':00:00',hd0*w,hd1*w,hd2*w,hd3*w,hd4*w,hd5*w]
        
        for var in varis_raw:
            df_hrrr_raw[var].iloc[count] = np.nansum(intersection_sub['weights'].values*hrrr_daily_mean_region[var].sel(time=np.datetime64(today+ ' '+str(day_start_hour)+':00:00')).values, axis=(1,2))
        
        df_hrrr = pd.concat([df_hrrr_derived,df_hrrr_raw],axis=1)
        dat_hrrr.close()
        count =count+1
        #print(df_hrrr)
    return df_hrrr

## RAVE

In [88]:
def rave_timeseries(df, day_start_hour, sum_interval):
    varis = ['day','Mean_FRP', 'FRE', 'CO2', 'CO', 'SO2', 'OC', 'BC', 'PM2.5', 'NOx', 'NH3'] #don't need 'area', it's the area of each cell
    df_rave = generate_df(varis, len(df))
    
    #do the intersection, in parallel
    rave_intersections = Parallel(n_jobs=8)(delayed(calculate_intersection)
                                 (df.iloc[ii:ii+1],'RAVE_GRID',0.1) 
                                 for ii in range(len(df)))
    fire_rave_intersection=gpd.GeoDataFrame(pd.concat(rave_intersections, ignore_index=True))
    fire_rave_intersection.set_geometry(col='geometry')    
    fire_rave_intersection = fire_rave_intersection.set_index([str(day_start_hour)+ 'Z Start Day', 'row', 'col'])
    fire_rave_intersection=fire_rave_intersection[~fire_rave_intersection.index.duplicated()]

    fire_rave_intersection_xr = fire_rave_intersection.to_xarray()
    
    #load in rave data associated with the fire
    times = pd.date_range(np.datetime64(df[str(day_start_hour)+ 'Z Start Day'].iloc[0]),
                        np.datetime64(df[str(day_start_hour)+ 'Z Start Day'].iloc[len(df)-1])+
                        np.timedelta64(1,'D'))
    rave_filenames,times_back_used = make_file_namelist(times,'/data2/lthapa/YYYY/AprYYYY_to_OctYYYY/Hourly_Emissions_FV3_13km_YYYYMMDD0000_YYYYMMDD2300xr.nc')
    dat_rave = xr.open_mfdataset(rave_filenames,concat_dim='Time',combine='nested',compat='override', coords='all')
    dat_rave = dat_rave.assign_coords({'Time': dat_rave.time}) #assign coords so we can resample along time
    dat_rave = dat_rave.resample(Time=str(sum_interval)+'H',base=day_start_hour).sum(dim='Time') #take the daily sum
    print(dat_rave.Time)

    times_sel = [np.datetime64(fire_rave_intersection_xr[str(day_start_hour)+ 'Z Start Day'].values[kk])+np.timedelta64(12,'h')
                for kk in range(len(fire_rave_intersection_xr[str(day_start_hour)+ 'Z Start Day'].values))]
    
    #select the locations and times we want
    dat_rave_sub = dat_rave.isel(yFRP = fire_rave_intersection_xr['row'].values.astype(int), 
                    xFRP = fire_rave_intersection_xr['col'].values.astype(int)).sel(
                    Time = pd.to_datetime(times_sel))#these should be lined up correctly
    
    ndays = len(fire_rave_intersection_xr[str(day_start_hour)+ 'Z Start Day'])
    


    df_rave['day'].iloc[:] = pd.to_datetime(fire_rave_intersection_xr[str(day_start_hour)+ 'Z Start Day'].values)
    for var in varis[1:]:
        df_rave[var] = np.nansum(fire_rave_intersection_xr['weights'].values*dat_rave_sub[var].values, axis=(1,2))
    
    return df_rave

## Resources

In [105]:
def resources_timeseries(df, day_start_hour,sit209_data):

    #get the fire incident number, lat, and lon
    incident_number = df['Incident Number'].iloc[0]
    fire_lat = df['Lat Fire'].iloc[0]
    fire_lon = df['Lon Fire'].iloc[0]
    #print(incident_number, fire_lat, fire_lon)
    
    sit209_data_fire = sit209_data[sit209_data['INC_IDENTIFIER']==incident_number]
    #print(sit209_data_fire)
    #do the time zone conversion
    obj=TimezoneFinder() #initialize the timezone finder
    tz = obj.timezone_at(lng=fire_lon, lat=fire_lat) #get the timezone
    local = pytz.timezone(tz)
    utc = pytz.utc
    
    #put the start and end times in local time
    loc_dt_start = [local.localize(datetime.datetime.strptime(date, '%m/%d/%Y %H:%M:%S %p')) for date in sit209_data_fire['REPORT_FROM_DATE'].values]
    loc_dt_end = [local.localize(datetime.datetime.strptime(date, '%m/%d/%Y %H:%M:%S %p')) for date in sit209_data_fire['REPORT_TO_DATE'].values]
    
    #put them in UTC time
    utc_dt_start = [time_start.astimezone(utc) for time_start in loc_dt_start]
    utc_dt_end = [time_end.astimezone(utc) for time_end in loc_dt_end]
    
    start_day = pd.to_datetime(utc_dt_start[0]).strftime('%Y-%m-%d')+' '+str(day_start_hour)+':00'
    
    
    #reassign to UTC time, this DOES keep track of daylight savings (eg +7 is used for PDT, +8 is used for PST)
    sit209_data_fire['Report Start UTC'] = pd.to_datetime(utc_dt_start)
    sit209_data_fire['Report End UTC'] = pd.to_datetime(utc_dt_end)
    sit209_data_fire['Timezone']= tz
    print(sit209_data_fire)
    
    #localise the index
    sit209_data_fire = sit209_data_fire.set_index(['Report Start UTC']).tz_localize(None)
    #print(sit209_data_fire.iloc[0:4])
    
    
    ## do the 12z-12z day grouping, based on the UTC times
    #start_day_utc = str(utc_dt_start[0])
    start_day_utc=str(df[str(day_start_hour)+'Z Start Day'][0])
    start_datetime_utc = np.datetime64(start_day_utc[0:10]+'T'+str(day_start_hour).zfill(2)+':00')
    print(start_datetime_utc)
    #sit209_data_fire = sit209_data_fire.resample('24H',origin=start_datetime_utc)

    personnel = sit209_data_fire['RESOURCE_PERSONNEL'].resample('24H',origin=start_datetime_utc).sum().reset_index()
    percent_contained = sit209_data_fire['PCT_CONTAINED_COMPLETED'].resample('24H',origin=start_datetime_utc).mean().reset_index()
    df_sit209 = pd.concat([percent_contained,personnel.drop(columns='Report Start UTC')],axis=1)
    print(df_sit209)
    df_sit209.columns=['day', 'percent_contained', 'personnel']
    df_sit209['day'] = pd.to_datetime(df_sit209['day'].values).strftime('%Y-%m-%d')
    inds = df_sit209['day'].isin(df[str(day_start_hour)+'Z Start Day']).values
    
    return df_sit209[inds]
    

# Dataset independent helper functions

In [2]:
#makes and saves a geodataframe of a grid given the center and corner points for that grid as 2D matrices
def build_one_gridcell(LAT_COR, LON_COR, LAT_CTR, LON_CTR, loc):
    ii=loc[0]
    jj=loc[1]

    #print(LAT_CTR[ii,jj], LON_CTR[ii,jj]) #ctr
    sw = (LON_COR[ii, jj],LAT_COR[ii, jj]) #SW
    se =(LON_COR[ii, jj+1],LAT_COR[ii, jj+1]) #SE
    nw = (LON_COR[ii+1, jj],LAT_COR[ii+1, jj]) #NW
    ne = (LON_COR[ii+1, jj+1],LAT_COR[ii+1, jj+1]) #NE
            
    poly_cell = Polygon([sw,nw,ne,se])

    return LAT_CTR[ii,jj], LON_CTR[ii,jj],ii,jj,poly_cell
    

In [3]:
#poly is the polygon for one timestep (in lcc)
#grid is an xarray of a model grid from the nc file
#grid_names is a string array [0:'lat_center_name',1:'lon_center_name',2:'lat_corner_name',3:'lon_corner_name']

def calculate_intersection(poly,dataset_name,bf):
    print(poly['12Z Start Day'])
    #load in the merra grid
    grid = xr.open_dataset(dataset_name+'.nc')

    #get the bounds of the buffered polygons
    poly_latlon =poly.to_crs(epsg=4326)
    bounds = poly_latlon.buffer(bf).bounds
    
    #first check for rows and cols, filtering near the polygon
    [rows,cols] = np.where((grid.LAT_CTR.values>bounds['miny'].values)&
                    (grid.LAT_CTR.values<bounds['maxy'].values)&
                    (grid.LON_CTR.values>bounds['minx'].values)&
                    (grid.LON_CTR.values<bounds['maxx'].values))
    #print(rows,cols)
    
    locs = zip(rows,cols)
    #print([loc for loc in locs])
    
    """
    if (rows.size==0)|(cols.size==0):
        lat_middle = (bounds['maxy'].values+bounds['miny'].values)/2
        lon_middle = (bounds['maxx'].values+bounds['minx'].values)/2
        
        distance = np.sqrt((grid.LAT_CTR.values-lat_middle)**2+(grid.LON_CTR.values-lon_middle)**2)
        row_minloc,col_minloc = np.where(distance==np.min(distance))
        print(row_minloc,col_minloc)
        
        rows = np.arange(row_minloc-1,row_minloc+2,1)
        cols = np.arange(col_minloc-1,col_minloc+2,1)
        
        locs = [(row,col) for row in rows for col in cols]
        
    #print(rows,cols)
    #print(locs)
    """
    
    #make a geodataframe (in paralell of the rows and cols)
    results = Parallel(n_jobs=8)(delayed(build_one_gridcell)
                                 (grid['LAT_COR'].values, grid['LON_COR'].values,
                                  grid['LAT_CTR'].values, grid['LON_CTR'].values,loc) 
                                 for loc in locs)
    
    #format the grid subset into a dataframs
    df_grid=gpd.GeoDataFrame(results)
    df_grid.columns = ['lat', 'lon', 'row', 'col', 'geometry']
    df_grid.set_geometry(col='geometry',inplace=True,crs='EPSG:4326') #need to say it's in lat/lon before transform to LCC
    df_grid=df_grid.to_crs(epsg=3347)

    
    #intersect the polygon with the grid subset
    intersection = gpd.overlay(df_grid, poly, how='intersection',keep_geom_type=False).drop_duplicates()
    intersection['grid intersection area (ha)'] =intersection['geometry'].area/10000
    intersection['weights'] = intersection['grid intersection area (ha)']/intersection['fire area (ha)'] 
    
    return intersection


In [4]:
#LAT and LON are 2d arrays
def calculate_grid_cell_corners(LAT, LON):
    #we will assume the very edges of the polygons don't touch the boundary of the domain
    lat_corners = (LAT[0:(LAT.shape[0]-1),  0:(LAT.shape[1])-1] + LAT[1:(LAT.shape[0]), 1:(LAT.shape[1])])/2
    lon_corners = (LON[0:(LAT.shape[0]-1),  0:(LAT.shape[1])-1] + LON[1:(LAT.shape[0]), 1:(LAT.shape[1])])/2
    return lat_corners, lon_corners


In [5]:
def make_file_namelist(time,base_filename):
    filename_list = np.array([])
    times_back_used = np.array([])
    for jj in range(len(time)):
        fname = base_filename.replace('YYYY',time[jj].strftime('%Y')).\
                                replace('MM',time[jj].strftime('%m')).\
                                replace('DD',time[jj].strftime('%d')).\
                                replace('HH',time[jj].strftime('%H')).\
                                replace('JJJ',time[jj].strftime('%j'))
        if exists(fname):
            filename_list = np.append(filename_list,fname)
            times_back_used = np.append(times_back_used,time[jj])
    return filename_list, times_back_used

In [6]:
def generate_df(variables, length):
    df = pd.DataFrame()
    for vv in variables:
        df[vv] = np.zeros(length)
    return df