In [1]:
import pandas as pd
#pd.set_option('display.max_rows', None)
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import cartopy.crs as ccrs
import cartopy
import numpy as np
import netCDF4 as nc
np.set_printoptions(threshold=100000)
from shapely.geometry import Polygon, Point, MultiPoint
from shapely.ops import cascaded_union
from datetime import datetime, timedelta
import time

from my_functions import sat_vap_press, vap_press, hot_dry_windy, haines

## Load in and concatenate the feature vectors
(labels will be selected from the old feature vector this go-round. Will be same day forecast)

In [2]:
feat_fuels = pd.read_csv('fire_features_fuels.csv') #fccs
feat_merra = pd.read_csv('fire_features_merra.csv') #merra 
feat_precip = pd.read_csv('fire_features_precip.csv') #precip

labels_pm25 = pd.read_csv('fire_labels_1dayout.csv') #labels (12Z PM2.5 rates from QFED)

In [3]:
#combine the two precip names into one
precip = feat_precip[['A_PCP_GDS5_SFC_acc24h', 'APCP_P8_L1_GST0_acc']].values
precip = np.nanmax(precip, axis=1)


In [4]:
# pull fccs, slp and aspect out of the fuels csv
fccs = feat_fuels['fccs'].values
slp=feat_fuels['slp'].values
asp = feat_fuels['asp'].values

In [5]:
#put all the extras into a dataframe
features_extra = pd.DataFrame({'precip': precip, 'fccs': fccs, 'slp': slp, 'asp': asp})
#add it all to the MERRA vector
fire_features = pd.concat((feat_merra, features_extra), axis=1)
fire_features

Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points,TLML_12Z,QLML_12Z,SPEEDLML_12Z,PS_12Z,T_12Z_700mb,T_12Z_500mb,QV_12Z_700mb,PBLH_12Z,TCZPBL_12Z,precip,fccs,slp,asp
0,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0,290.748108,0.004608,10.770707,91644.742188,281.218140,261.982361,0.001890,64.297165,104.286018,0.0,307.0,2.0,-3.0
1,10663171.0,LONE MOUNTAIN,2019-06-09,33.808056,-105.738611,2.0,290.003723,0.003563,8.092171,81498.414062,286.447601,266.682251,0.002318,321.130920,170.300583,0.0,25.0,10.0,98.0
2,10663171.0,LONE MOUNTAIN,2019-06-10,33.808056,-105.738611,0.0,281.681793,0.006098,10.386342,82444.875000,281.240356,262.869751,0.007128,456.560364,585.127686,0.0,25.0,10.0,98.0
3,10663171.0,LONE MOUNTAIN,2019-06-11,33.808056,-105.738611,0.0,283.622253,0.009279,6.614388,81877.890625,283.966187,263.090515,0.005877,113.951096,175.440399,0.0,25.0,10.0,98.0
4,10663171.0,LONE MOUNTAIN,2019-06-12,33.808056,-105.738611,0.0,284.536926,0.006591,12.682461,81796.609375,284.776917,267.068909,0.004644,314.892853,670.379883,0.0,25.0,10.0,98.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8156,11979237.0,Trail,2020-11-02,34.147222,-112.117778,2.0,293.930328,0.003627,7.339819,91107.664062,282.576782,262.727051,0.004921,64.977539,219.045731,0.0,601.0,260.0,2591.0
8157,11980186.0,SANDERSON,2020-12-13,33.889444,-117.070833,11.0,279.254852,0.001604,6.979411,90997.539062,276.615021,257.887451,0.001588,61.692558,355.103851,0.0,44.0,42.0,819.0
8158,11980587.0,CREEK 5,2020-12-24,33.402646,-117.266795,2.0,288.125366,0.001511,10.678336,99387.382812,275.902649,258.410187,0.000153,148.465149,612.890869,0.0,602.0,27.0,642.0
8159,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0,287.946075,0.002882,5.147175,99128.710938,272.194824,258.612366,0.002691,328.705231,123.513199,0.0,602.0,75.0,881.0


## Drop the zero features

In [6]:
inds = np.where(fire_features['QV_12Z_700mb']==0) #here's where we drop the zero features
features = fire_features.drop(labels=inds[0], axis=0)
features = features.reset_index(drop=True) #reset the indices beecause we dropped some


labels = labels_pm25.drop(labels=inds[0], axis=0)
labels = labels.reset_index(drop=True)
labels

Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points,biomass_12Z_today,biomass_12Z_tomorrow
0,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0,7.627477e-10,9.852367e-13
1,10663171.0,LONE MOUNTAIN,2019-06-09,33.808056,-105.738611,2.0,0.000000e+00,0.000000e+00
2,10663171.0,LONE MOUNTAIN,2019-06-10,33.808056,-105.738611,0.0,0.000000e+00,0.000000e+00
3,10663171.0,LONE MOUNTAIN,2019-06-11,33.808056,-105.738611,0.0,0.000000e+00,0.000000e+00
4,10663171.0,LONE MOUNTAIN,2019-06-12,33.808056,-105.738611,0.0,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...,...,...
8090,11979237.0,Trail,2020-11-02,34.147222,-112.117778,2.0,0.000000e+00,0.000000e+00
8091,11980186.0,SANDERSON,2020-12-13,33.889444,-117.070833,11.0,1.553367e-09,1.298944e-10
8092,11980587.0,CREEK 5,2020-12-24,33.402646,-117.266795,2.0,0.000000e+00,7.784754e-10
8093,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0,7.784754e-10,0.000000e+00


## Hot-Dry-Windy

In [7]:
# calculate the hot-dry-windy and related variables
print(len(features['TLML_12Z'].values))
esat = sat_vap_press(features['TLML_12Z'].values)
e = vap_press(features['QLML_12Z'].values, features['PS_12Z'].values)
hdw = hot_dry_windy(e, esat, features['SPEEDLML_12Z'].values)
rh = e/esat
#append them to the dataframe
df_hdw = pd.DataFrame({'ESATLML_12Z':esat, 'ELML_12Z':e, 'HDWLML':hdw, 'RHLML_12Z': rh})
features = pd.concat([features, df_hdw], axis=1)
features

8095


Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points,TLML_12Z,QLML_12Z,SPEEDLML_12Z,PS_12Z,...,PBLH_12Z,TCZPBL_12Z,precip,fccs,slp,asp,ESATLML_12Z,ELML_12Z,HDWLML,RHLML_12Z
0,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0,290.748108,0.004608,10.770707,91644.742188,...,64.297165,104.286018,0.0,307.0,2.0,-3.0,20.131931,2.638775,188.413662,0.131074
1,10663171.0,LONE MOUNTAIN,2019-06-09,33.808056,-105.738611,2.0,290.003723,0.003563,8.092171,81498.414062,...,321.130920,170.300583,0.0,25.0,10.0,98.0,19.205433,1.812535,140.746304,0.094376
2,10663171.0,LONE MOUNTAIN,2019-06-10,33.808056,-105.738611,0.0,281.681793,0.006098,10.386342,82444.875000,...,456.560364,585.127686,0.0,25.0,10.0,98.0,11.125224,3.146602,82.868697,0.282835
3,10663171.0,LONE MOUNTAIN,2019-06-11,33.808056,-105.738611,0.0,283.622253,0.009279,6.614388,81877.890625,...,113.951096,175.440399,0.0,25.0,10.0,98.0,12.676732,4.770291,52.296270,0.376303
4,10663171.0,LONE MOUNTAIN,2019-06-12,33.808056,-105.738611,0.0,284.536926,0.006591,12.682461,81796.609375,...,314.892853,670.379883,0.0,25.0,10.0,98.0,13.471937,3.375874,128.042935,0.250586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8090,11979237.0,Trail,2020-11-02,34.147222,-112.117778,2.0,293.930328,0.003627,7.339819,91107.664062,...,64.977539,219.045731,0.0,601.0,260.0,2591.0,24.548924,2.063010,165.042546,0.084037
8091,11980186.0,SANDERSON,2020-12-13,33.889444,-117.070833,11.0,279.254852,0.001604,6.979411,90997.539062,...,61.692558,355.103851,0.0,44.0,42.0,819.0,9.421946,0.909643,59.410857,0.096545
8092,11980587.0,CREEK 5,2020-12-24,33.402646,-117.266795,2.0,288.125366,0.001511,10.678336,99387.382812,...,148.465149,612.890869,0.0,602.0,27.0,642.0,17.031751,0.935601,171.880108,0.054933
8093,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0,287.946075,0.002882,5.147175,99128.710938,...,328.705231,123.513199,0.0,602.0,75.0,881.0,16.836034,1.782489,77.483227,0.105873


## Haines Index

In [8]:
# calculate the Haines and related variables
import metpy.calc as mc
from metpy.units import units

#vapor pressure at 700mb
e_hPa_700 = vap_press(features['QV_12Z_700mb'].values, 700*np.ones(features['QV_12Z_700mb'].values.shape)) # hPa
e_hPa_700 = units.Quantity(e_hPa_700, "hPa")

#dewpoint at 700mb
td_700 = mc.dewpoint(e_hPa_700)
td_700 = np.array(td_700) # degrees C
print(td_700.shape)

haines_index = haines(features['T_12Z_700mb'].values, features['T_12Z_500mb'], td_700)

df_haines = pd.DataFrame({'Td_12Z_700mb':td_700, 'E_700mb':e_hPa_700, 'HAINES': haines_index})

features = pd.concat([features, df_haines], axis=1)
features

(8095,)


Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points,TLML_12Z,QLML_12Z,SPEEDLML_12Z,PS_12Z,...,fccs,slp,asp,ESATLML_12Z,ELML_12Z,HDWLML,RHLML_12Z,Td_12Z_700mb,E_700mb,HAINES
0,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0,290.748108,0.004608,10.770707,91644.742188,...,307.0,2.0,-3.0,20.131931,2.638775,188.413662,0.131074,-66.278872,0.008245,5.0
1,10663171.0,LONE MOUNTAIN,2019-06-09,33.808056,-105.738611,2.0,290.003723,0.003563,8.092171,81498.414062,...,25.0,10.0,98.0,19.205433,1.812535,140.746304,0.094376,-64.771856,0.010118,5.0
2,10663171.0,LONE MOUNTAIN,2019-06-10,33.808056,-105.738611,0.0,281.681793,0.006098,10.386342,82444.875000,...,25.0,10.0,98.0,11.125224,3.146602,82.868697,0.282835,-55.985997,0.031258,5.0
3,10663171.0,LONE MOUNTAIN,2019-06-11,33.808056,-105.738611,0.0,283.622253,0.009279,6.614388,81877.890625,...,25.0,10.0,98.0,12.676732,4.770291,52.296270,0.376303,-57.560401,0.025739,5.0
4,10663171.0,LONE MOUNTAIN,2019-06-12,33.808056,-105.738611,0.0,284.536926,0.006591,12.682461,81796.609375,...,25.0,10.0,98.0,13.471937,3.375874,128.042935,0.250586,-59.441913,0.020317,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8090,11979237.0,Trail,2020-11-02,34.147222,-112.117778,2.0,293.930328,0.003627,7.339819,91107.664062,...,601.0,260.0,2591.0,24.548924,2.063010,165.042546,0.084037,-58.983160,0.021533,5.0
8091,11980186.0,SANDERSON,2020-12-13,33.889444,-117.070833,11.0,279.254852,0.001604,6.979411,90997.539062,...,44.0,42.0,819.0,9.421946,0.909643,59.410857,0.096545,-67.541377,0.006927,5.0
8092,11980587.0,CREEK 5,2020-12-24,33.402646,-117.266795,2.0,288.125366,0.001511,10.678336,99387.382812,...,602.0,27.0,642.0,17.031751,0.935601,171.880108,0.054933,-82.935717,0.000664,5.0
8093,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0,287.946075,0.002882,5.147175,99128.710938,...,602.0,75.0,881.0,16.836034,1.782489,77.483227,0.105873,-63.654890,0.011750,4.0


## Concatenate features and labels

In [11]:
dataset = pd.concat([features, labels[['biomass_12Z_today', 'biomass_12Z_tomorrow']]], axis=1)
dataset

#dataset.to_csv('training_data_1day_forecast.csv', index=False)

Unnamed: 0,Incident Number,Fire Name,Current Day,Lat Fire,Lon Fire,Number of VIIRS points,TLML_12Z,QLML_12Z,SPEEDLML_12Z,PS_12Z,...,asp,ESATLML_12Z,ELML_12Z,HDWLML,RHLML_12Z,Td_12Z_700mb,E_700mb,HAINES,biomass_12Z_today,biomass_12Z_tomorrow
0,10662684.0,PAINTED WAGON,2019-04-08,33.786944,-112.753333,3.0,290.748108,0.004608,10.770707,91644.742188,...,-3.0,20.131931,2.638775,188.413662,0.131074,-66.278872,0.008245,5.0,7.627477e-10,9.852367e-13
1,10663171.0,LONE MOUNTAIN,2019-06-09,33.808056,-105.738611,2.0,290.003723,0.003563,8.092171,81498.414062,...,98.0,19.205433,1.812535,140.746304,0.094376,-64.771856,0.010118,5.0,0.000000e+00,0.000000e+00
2,10663171.0,LONE MOUNTAIN,2019-06-10,33.808056,-105.738611,0.0,281.681793,0.006098,10.386342,82444.875000,...,98.0,11.125224,3.146602,82.868697,0.282835,-55.985997,0.031258,5.0,0.000000e+00,0.000000e+00
3,10663171.0,LONE MOUNTAIN,2019-06-11,33.808056,-105.738611,0.0,283.622253,0.009279,6.614388,81877.890625,...,98.0,12.676732,4.770291,52.296270,0.376303,-57.560401,0.025739,5.0,0.000000e+00,0.000000e+00
4,10663171.0,LONE MOUNTAIN,2019-06-12,33.808056,-105.738611,0.0,284.536926,0.006591,12.682461,81796.609375,...,98.0,13.471937,3.375874,128.042935,0.250586,-59.441913,0.020317,5.0,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8090,11979237.0,Trail,2020-11-02,34.147222,-112.117778,2.0,293.930328,0.003627,7.339819,91107.664062,...,2591.0,24.548924,2.063010,165.042546,0.084037,-58.983160,0.021533,5.0,0.000000e+00,0.000000e+00
8091,11980186.0,SANDERSON,2020-12-13,33.889444,-117.070833,11.0,279.254852,0.001604,6.979411,90997.539062,...,819.0,9.421946,0.909643,59.410857,0.096545,-67.541377,0.006927,5.0,1.553367e-09,1.298944e-10
8092,11980587.0,CREEK 5,2020-12-24,33.402646,-117.266795,2.0,288.125366,0.001511,10.678336,99387.382812,...,642.0,17.031751,0.935601,171.880108,0.054933,-82.935717,0.000664,5.0,0.000000e+00,7.784754e-10
8093,11980587.0,CREEK 5,2020-12-25,33.402646,-117.266795,16.0,287.946075,0.002882,5.147175,99128.710938,...,881.0,16.836034,1.782489,77.483227,0.105873,-63.654890,0.011750,4.0,7.784754e-10,0.000000e+00


## FCCS Model Features

## KBDI (not finished yet)

In [None]:
days=features['Current Day'].values
print(days[0][0:4])
days_reformatted= [days[jj][0:4]+days[jj][5:7]+days[jj][8:10] for jj in range(len(days))]
                

In [None]:
df_in = pd.DataFrame({'date': days_reformatted, 
        'precip': features['precip'].values,
         'temp': features['TLML_12Z'].values,
         'rh': features['RHLML_12Z'].values,
        'wind': features['SPEEDLML_12Z'].values})
df_in.to_csv('KBDI_IN.csv')

In [None]:
command = 'kbdi-ffdi-run -i /data2/lthapa/ML_daily/KBDI_IN.csv'+' -o /data2/lthapa/ML_daily/KBDI_OUT.csv'
print(command)
os.system(command)

In [None]:
df_kbdi = pd.read_csv('KBDI_OUT.csv')
kbdi = df_kbdi['KBDI']
print(kbdi.values)

features = pd.concat([features, kbdi], axis=1)
features

In [None]:
kbdi = np.zeros(len(features))
for ii in range(1):#len(features)):
    df_in = pd.DataFrame({'date': days_reformatted[ii], 
        'precip': features.loc[ii, 'precip'],
         'temp': features.loc[ii, 'TLML_12Z'],
         'rh': features.loc[ii, 'RHLML_12Z'],
        'wind': features.loc[ii, 'SPEEDLML_12Z']}, index=range(1))
    print(df_in)
    df_in.to_csv('/data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_IN_'+str(ii)+'.csv')
    command = 'kbdi-ffdi-run -i /data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_IN_'+str(ii)+'.csv'+' -o /data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_OUT_'+str(ii)+'.csv'
    print(command)
    os.system(command)
    df_kbdi = pd.read_csv('/data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_OUT_'+str(ii)+'.csv')
    print(df_kbdi)

In [None]:
features.loc[0, 'precip']

## Old code to concatenate precip vector

In [None]:
feat_precip_2019 = pd.read_csv('fire_features_precip_2019.csv') #2019 precip
feat_precip_2020 = pd.read_csv('fire_features_precip_2020.csv') #2020 precip
precip_2019 = feat_precip_2019['A_PCP_GDS5_SFC_acc24h'].values.reshape(-1, 1)
precip_2020 = feat_precip_2020[['A_PCP_GDS5_SFC_acc24h','APCP_P8_L1_GST0_acc']].values
precip = np.concatenate((precip_2019, precip_2020), axis=1)
precip = np.nansum(precip, axis=1)