In [1]:
import pandas as pd
#pd.set_option('display.max_rows', None)
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import cartopy.crs as ccrs
import cartopy
import numpy as np
import netCDF4 as nc
np.set_printoptions(threshold=100000)
from shapely.geometry import Polygon, Point, MultiPoint
from shapely.ops import cascaded_union
from datetime import datetime, timedelta
import time
import metpy.calc as mc
from metpy.units import units

from my_functions import sat_vap_press, vap_press, hot_dry_windy, haines

## Load in and concatenate the feature vectors
This will concatenate met/precip from today and labels for today and tomorrow

In [5]:
ndays = 2
feat_fuels = pd.read_csv('fire_features_fuels.csv') #fccs
feat_merra = pd.read_csv('fire_features_merra_1dayout.csv') #merra 
feat_precip = pd.read_csv('fire_features_precip_1dayout_day0poly.csv') #precip

labels_pm25 = pd.read_csv('fire_labels_1dayout_day0poly.csv') #labels (12Z PM2.5 rates from QFED)

In [6]:
# pull fccs, slp and aspect out of the fuels csv (static feature vector)
feat_add = feat_fuels.loc[:,['fccs', 'slp', 'asp']]
fire_features = pd.concat((feat_merra, feat_add), axis=1)
fire_features.columns

Index(['Incident Number', 'Fire Name', 'Current Day', 'Lat Fire', 'Lon Fire',
       'Number of VIIRS points', 'TLML_12Z_0', 'QLML_12Z_0', 'SPEEDLML_12Z_0',
       'PS_12Z_0', 'T_12Z_700mb_0', 'T_12Z_500mb_0', 'QV_12Z_700mb_0',
       'PBLH_12Z_0', 'TCZPBL_12Z_0', 'TLML_12Z_1', 'QLML_12Z_1',
       'SPEEDLML_12Z_1', 'PS_12Z_1', 'T_12Z_700mb_1', 'T_12Z_500mb_1',
       'QV_12Z_700mb_1', 'PBLH_12Z_1', 'TCZPBL_12Z_1', 'fccs', 'slp', 'asp'],
      dtype='object')

In [7]:
for dy in range(ndays):
    #combine the two precip names into one
    precip = feat_precip[['A_PCP_GDS5_SFC_acc24h_'+str(dy), 'APCP_P8_L1_GST0_acc_'+str(dy)]].values #just need to add the _0 and _1 when I have that vector done
    precip = np.nanmax(precip, axis=1)
    feat_add = pd.DataFrame({'precip_'+str(dy): precip})
    fire_features = pd.concat((fire_features, feat_add), axis=1)
fire_features.columns

Index(['Incident Number', 'Fire Name', 'Current Day', 'Lat Fire', 'Lon Fire',
       'Number of VIIRS points', 'TLML_12Z_0', 'QLML_12Z_0', 'SPEEDLML_12Z_0',
       'PS_12Z_0', 'T_12Z_700mb_0', 'T_12Z_500mb_0', 'QV_12Z_700mb_0',
       'PBLH_12Z_0', 'TCZPBL_12Z_0', 'TLML_12Z_1', 'QLML_12Z_1',
       'SPEEDLML_12Z_1', 'PS_12Z_1', 'T_12Z_700mb_1', 'T_12Z_500mb_1',
       'QV_12Z_700mb_1', 'PBLH_12Z_1', 'TCZPBL_12Z_1', 'fccs', 'slp', 'asp',
       'precip_0', 'precip_1'],
      dtype='object')

## Drop the zero features (not sure the best way to do this)

In [None]:
inds = np.where(fire_features['QV_12Z_700mb']==0) #here's where we drop the zero features
features = fire_features.drop(labels=inds[0], axis=0)
features = features.reset_index(drop=True) #reset the indices beecause we dropped some


labels = labels_pm25.drop(labels=inds[0], axis=0)
labels = labels.reset_index(drop=True)
labels

## Hot-Dry-Windy

In [8]:
for dy in range(ndays):
    # calculate the hot-dry-windy and related variables
    esat = sat_vap_press(fire_features['TLML_12Z_'+str(dy)].values)
    e = vap_press(fire_features['QLML_12Z_'+str(dy)].values, fire_features['PS_12Z_'+str(dy)].values)
    hdw = hot_dry_windy(e, esat, fire_features['SPEEDLML_12Z_'+str(dy)].values)
    rh = e/esat
    #append them to the dataframe
    df_hdw = pd.DataFrame({'ESATLML_12Z_'+str(dy):esat, 'ELML_12Z_'+str(dy):e, 'HDWLML_'+str(dy):hdw, 'RHLML_12Z_'+str(dy): rh})
    fire_features = pd.concat([fire_features, df_hdw], axis=1)
fire_features.columns

  *(53.878-1331.22/T-9.44523*np.log(T)+0.014025*T))/101325*1013.25 # hPa
  *(53.878-1331.22/T-9.44523*np.log(T)+0.014025*T))/101325*1013.25 # hPa
  *(53.878-1331.22/T-9.44523*np.log(T)+0.014025*T))/101325*1013.25 # hPa


Index(['Incident Number', 'Fire Name', 'Current Day', 'Lat Fire', 'Lon Fire',
       'Number of VIIRS points', 'TLML_12Z_0', 'QLML_12Z_0', 'SPEEDLML_12Z_0',
       'PS_12Z_0', 'T_12Z_700mb_0', 'T_12Z_500mb_0', 'QV_12Z_700mb_0',
       'PBLH_12Z_0', 'TCZPBL_12Z_0', 'TLML_12Z_1', 'QLML_12Z_1',
       'SPEEDLML_12Z_1', 'PS_12Z_1', 'T_12Z_700mb_1', 'T_12Z_500mb_1',
       'QV_12Z_700mb_1', 'PBLH_12Z_1', 'TCZPBL_12Z_1', 'fccs', 'slp', 'asp',
       'precip_0', 'precip_1', 'ESATLML_12Z_0', 'ELML_12Z_0', 'HDWLML_0',
       'RHLML_12Z_0', 'ESATLML_12Z_1', 'ELML_12Z_1', 'HDWLML_1',
       'RHLML_12Z_1'],
      dtype='object')

## Haines Index

In [9]:
# calculate the Haines and related variables

for dy in range(ndays):

    #vapor pressure at 700mb
    e_hPa_700 = vap_press(fire_features['QV_12Z_700mb_'+str(dy)].values, 700*np.ones(fire_features['QV_12Z_700mb_'+str(dy)].values.shape)) # hPa
    e_hPa_700 = units.Quantity(e_hPa_700, "hPa")

    #dewpoint at 700mb
    td_700 = mc.dewpoint(e_hPa_700)
    td_700 = np.array(td_700) # degrees C
    print(td_700.shape)

    haines_index = haines(fire_features['T_12Z_700mb_'+str(dy)].values, fire_features['T_12Z_500mb_'+str(dy)], td_700)

    df_haines = pd.DataFrame({'Td_12Z_700mb_'+str(dy):td_700, 'E_700mb_'+str(dy):e_hPa_700, 'HAINES_'+str(dy): haines_index})

    fire_features = pd.concat([fire_features, df_haines], axis=1)
fire_features.columns

  result_magnitude = func(*stripped_args, **stripped_kwargs)
  magnitude = magnitude_op(new_self._magnitude, other._magnitude)
  B_cat[(B<=14)] = 1 # B_cat = 1 when B<=14
  B_cat[(B>14) & (B<21)] = 2 # B_cat = 2 when B>14 and B<21
  B_cat[(B>14) & (B<21)] = 2 # B_cat = 2 when B>14 and B<21
  B_cat[(B>=21)] = 3 # B_cat = 3 when B>=21


(8161,)


  result_magnitude = func(*stripped_args, **stripped_kwargs)
  magnitude = magnitude_op(new_self._magnitude, other._magnitude)
  B_cat[(B<=14)] = 1 # B_cat = 1 when B<=14
  B_cat[(B>14) & (B<21)] = 2 # B_cat = 2 when B>14 and B<21
  B_cat[(B>14) & (B<21)] = 2 # B_cat = 2 when B>14 and B<21
  B_cat[(B>=21)] = 3 # B_cat = 3 when B>=21


(8161,)


Index(['Incident Number', 'Fire Name', 'Current Day', 'Lat Fire', 'Lon Fire',
       'Number of VIIRS points', 'TLML_12Z_0', 'QLML_12Z_0', 'SPEEDLML_12Z_0',
       'PS_12Z_0', 'T_12Z_700mb_0', 'T_12Z_500mb_0', 'QV_12Z_700mb_0',
       'PBLH_12Z_0', 'TCZPBL_12Z_0', 'TLML_12Z_1', 'QLML_12Z_1',
       'SPEEDLML_12Z_1', 'PS_12Z_1', 'T_12Z_700mb_1', 'T_12Z_500mb_1',
       'QV_12Z_700mb_1', 'PBLH_12Z_1', 'TCZPBL_12Z_1', 'fccs', 'slp', 'asp',
       'precip_0', 'precip_1', 'ESATLML_12Z_0', 'ELML_12Z_0', 'HDWLML_0',
       'RHLML_12Z_0', 'ESATLML_12Z_1', 'ELML_12Z_1', 'HDWLML_1', 'RHLML_12Z_1',
       'Td_12Z_700mb_0', 'E_700mb_0', 'HAINES_0', 'Td_12Z_700mb_1',
       'E_700mb_1', 'HAINES_1'],
      dtype='object')

## Concatenate features and labels

In [10]:
dataset = pd.concat([fire_features, labels_pm25.loc[:,['biomass_12Z_today', 'biomass_12Z_tomorrow']]], axis=1)
dataset.columns


Index(['Incident Number', 'Fire Name', 'Current Day', 'Lat Fire', 'Lon Fire',
       'Number of VIIRS points', 'TLML_12Z_0', 'QLML_12Z_0', 'SPEEDLML_12Z_0',
       'PS_12Z_0', 'T_12Z_700mb_0', 'T_12Z_500mb_0', 'QV_12Z_700mb_0',
       'PBLH_12Z_0', 'TCZPBL_12Z_0', 'TLML_12Z_1', 'QLML_12Z_1',
       'SPEEDLML_12Z_1', 'PS_12Z_1', 'T_12Z_700mb_1', 'T_12Z_500mb_1',
       'QV_12Z_700mb_1', 'PBLH_12Z_1', 'TCZPBL_12Z_1', 'fccs', 'slp', 'asp',
       'precip_0', 'precip_1', 'ESATLML_12Z_0', 'ELML_12Z_0', 'HDWLML_0',
       'RHLML_12Z_0', 'ESATLML_12Z_1', 'ELML_12Z_1', 'HDWLML_1', 'RHLML_12Z_1',
       'Td_12Z_700mb_0', 'E_700mb_0', 'HAINES_0', 'Td_12Z_700mb_1',
       'E_700mb_1', 'HAINES_1', 'biomass_12Z_today', 'biomass_12Z_tomorrow'],
      dtype='object')

## Drop rows that contain nans and save

In [11]:
is_NaN = dataset.isnull()
row_has_NaN = np.where(is_NaN.any(axis=1))
print(row_has_NaN)
dataset =  dataset.drop(labels=row_has_NaN[0], axis=0)
dataset = dataset.reset_index(drop=True) #reset the indices beecause we dropped some
print(dataset.columns)

dataset.to_csv('training_data_1day_forecast_day0poly.csv')
#rows_with_NaN = df[row_has_NaN]

(array([ 485,  539,  540,  541,  542,  688, 1131, 1665, 1666, 1667, 1668,
       1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849, 2262, 2497, 2514,
       2626, 2627, 2733, 2734, 2921, 2924, 2948, 3458, 4168, 4169, 4777,
       4804, 5152, 5276, 5277, 5278, 5279, 5280, 5281, 5282, 5283, 5284,
       5285, 5286, 5287, 5288, 5289, 5290, 5291, 5292, 5293, 5294, 6984,
       6985, 6986, 6987, 6988, 7761, 7785, 7788, 7789, 7790, 7791, 8143]),)
Index(['Incident Number', 'Fire Name', 'Current Day', 'Lat Fire', 'Lon Fire',
       'Number of VIIRS points', 'TLML_12Z_0', 'QLML_12Z_0', 'SPEEDLML_12Z_0',
       'PS_12Z_0', 'T_12Z_700mb_0', 'T_12Z_500mb_0', 'QV_12Z_700mb_0',
       'PBLH_12Z_0', 'TCZPBL_12Z_0', 'TLML_12Z_1', 'QLML_12Z_1',
       'SPEEDLML_12Z_1', 'PS_12Z_1', 'T_12Z_700mb_1', 'T_12Z_500mb_1',
       'QV_12Z_700mb_1', 'PBLH_12Z_1', 'TCZPBL_12Z_1', 'fccs', 'slp', 'asp',
       'precip_0', 'precip_1', 'ESATLML_12Z_0', 'ELML_12Z_0', 'HDWLML_0',
       'RHLML_12Z_0', 'ESATLML_12Z_1', 'ELM

## KBDI (not finished yet)

In [None]:
days=features['Current Day'].values
print(days[0][0:4])
days_reformatted= [days[jj][0:4]+days[jj][5:7]+days[jj][8:10] for jj in range(len(days))]
                

In [None]:
df_in = pd.DataFrame({'date': days_reformatted, 
        'precip': features['precip'].values,
         'temp': features['TLML_12Z'].values,
         'rh': features['RHLML_12Z'].values,
        'wind': features['SPEEDLML_12Z'].values})
df_in.to_csv('KBDI_IN.csv')

In [None]:
command = 'kbdi-ffdi-run -i /data2/lthapa/ML_daily/KBDI_IN.csv'+' -o /data2/lthapa/ML_daily/KBDI_OUT.csv'
print(command)
os.system(command)

In [None]:
df_kbdi = pd.read_csv('KBDI_OUT.csv')
kbdi = df_kbdi['KBDI']
print(kbdi.values)

features = pd.concat([features, kbdi], axis=1)
features

In [None]:
kbdi = np.zeros(len(features))
for ii in range(1):#len(features)):
    df_in = pd.DataFrame({'date': days_reformatted[ii], 
        'precip': features.loc[ii, 'precip'],
         'temp': features.loc[ii, 'TLML_12Z'],
         'rh': features.loc[ii, 'RHLML_12Z'],
        'wind': features.loc[ii, 'SPEEDLML_12Z']}, index=range(1))
    print(df_in)
    df_in.to_csv('/data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_IN_'+str(ii)+'.csv')
    command = 'kbdi-ffdi-run -i /data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_IN_'+str(ii)+'.csv'+' -o /data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_OUT_'+str(ii)+'.csv'
    print(command)
    os.system(command)
    df_kbdi = pd.read_csv('/data2/lthapa/ML_daily/kbdi_ffdi_io/KBDI_OUT_'+str(ii)+'.csv')
    print(df_kbdi)

In [None]:
features.loc[0, 'precip']

## Old code to concatenate precip vector

In [None]:
feat_precip_2019 = pd.read_csv('fire_features_precip_2019.csv') #2019 precip
feat_precip_2020 = pd.read_csv('fire_features_precip_2020.csv') #2020 precip
precip_2019 = feat_precip_2019['A_PCP_GDS5_SFC_acc24h'].values.reshape(-1, 1)
precip_2020 = feat_precip_2020[['A_PCP_GDS5_SFC_acc24h','APCP_P8_L1_GST0_acc']].values
precip = np.concatenate((precip_2019, precip_2020), axis=1)
precip = np.nansum(precip, axis=1)