# Combine WRF and Landsat predictors at fuel specific LFM observation sites <br> for entire temporal domain
Last updated: Kevin Varga, 11/27/2024

**Inputs:**
* Predictor variable netcdf files

**Outputs:**
* Fuel specific csv files with observation site and full date range as pandas index

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
# Set paths
obs_path = '/home/sbarc/students/varga/nasa/ch1/data/lfm_obs/'
# Predictors are on SMS temporal resolution
predictor_path = '/home/sbarc/students/varga/nasa/ch1/data/predictors/'
output_path = '/home/sbarc/students/varga/nasa/ch1/data/bias_correction/site_predictors/'

In [3]:
# Read in LFM observations and clean up
obs_df = pd.read_csv(obs_path + 'lfm_crop.csv')
obs_df.drop(columns=['slope', 'aspect', 'elevation', 'gacc', 'category'], inplace=True)
obs_df['date'] = pd.to_datetime(obs_df['date'])

In [4]:
# Set column labels for combined predictor dataframe
col_labels = ['latitude', 'longitude', 'cwd90sum', 'daylength', 'nirv', 'precip30sum', 
              'precip90sum', 'rad150mean','rh150mean', 'somo7mean', 'temp90mean']

# Create list of predictor names in the same order that model was created
predictors = ['cwd90sum', 'daylength', 'nirv', 'precip30sum', 
              'precip90sum', 'rad150mean','rh150mean', 'somo7mean', 'temp90mean']

# Get unique fuel type values only if there are more than 500
fuels = obs_df['fuel'].value_counts()
fuels = fuels[fuels >= 500].index.tolist()

In [5]:
# Create date range that aligns with predictor date range
start_date = pd.to_datetime('12/01/1987')
end_date = pd.to_datetime('06/30/2019')
all_dates = pd.date_range(start=start_date, end=end_date, freq='SMS')

In [27]:
%%time
for fuel_type in fuels:
    # Subset observations from fuel type
    fuel_obs_df = obs_df.loc[obs_df['fuel'] == fuel_type]
    fuel_obs_df.reset_index(drop=True, inplace=True)
    # Determine observation site names
    sites = fuel_obs_df['site'].drop_duplicates()
    # Create multi index of site name and observation dates
    fuel_obs_df.set_index(['site', 'date'], inplace=True)
    
    # Create fuel specific dataframe with all sites and dates as multi index
    all_dates_index = pd.MultiIndex.from_product([sites, all_dates], names=['site', 'date'])
    all_dates_df = pd.DataFrame(index = all_dates_index, columns = col_labels)
    
    # Input observation site latitude and longitude
    for site_name in sites:
        all_dates_df.loc[site_name, 'latitude'][:] = fuel_obs_df.loc[site_name, 'latitude'][0]
        all_dates_df.loc[site_name, 'longitude'][:] = fuel_obs_df.loc[site_name, 'longitude'][0]
    
    for predictor_name in predictors:
        # Open predictor variable data array covering entire spatiotemporal domain
        pred_da = xr.open_dataarray(predictor_path + predictor_name + '.nc')
        # Subset predictor variable to date range
        pred_da = pred_da.sel(time = slice(start_date,end_date))
        
        for i, site_name in enumerate(sites):
            # Interpolate predictor values to observation site location
            site_pred_da = pred_da.interp(latitude = all_dates_df.loc[site_name, 'latitude'][0], 
                                          longitude = all_dates_df.loc[site_name, 'longitude'][0])
            site_pred_da = site_pred_da.drop_vars(['latitude','longitude'])
            site_pred_values = site_pred_da.to_numpy()

            # Combine predictor variable values at each site
            if i == 0:
                all_pred_values = site_pred_values
            else:
                all_pred_values = np.concatenate([all_pred_values, site_pred_values])
        
        # Save predictor variable values to combined dataframe
        all_dates_df.loc[:, predictor_name] = all_pred_values

    # Save fuel specific dataframe with all predictor variables for each site
    all_dates_df.to_csv(output_path + fuel_type + '.csv', index_label=['site','date'])
    # Verify that there are not any NaN values, which will mess the model up
    #nan_test = len(all_dates_df) - len(all_dates_df.dropna())
    #print(fuel_type + ' has ' + str(nan_test) + ' nan values')

chamise has 0 nan values
chamise_old_growth has 0 nan values
sage_black has 0 nan values
ceanothus_bigpod has 0 nan values
CPU times: user 9.65 s, sys: 16.3 s, total: 25.9 s
Wall time: 1min 16s
