# Create Test Dictionary

The purpose of this notebook is to create a dictionary of fuel moisture scenarios for model testing.

[USGS](https://water.usgs.gov/edu/activity-howmuchrain-metric.html#:~:text=Slight%20rain%3A%20Less%20than%200.5,than%208%20mm%20per%20hour.) classifies rainfall as:
* Slight: $\text{rain}<.5$ mm/hr
* Moderate: $.5<\text{rain}<4$ mm/hr
* Heavy: $4<\text{rain}<8$ mm/hr
* Very Heavy: $8<\text{rain}$ mm/hr

## Setup

In [None]:
# Change path for module imports
import os
os.chdir('..')

import numpy as np, random
from numpy.random import rand
import matplotlib.pyplot as plt
from data_funcs import synthetic_data, plot_data, to_json, from_json, format_raws, mse_data

import moisture_models as mod
from moisture_models import model_decay, model_moisture
from datetime import datetime, timedelta
import json
from utils import hash2

from MesoPy import Meso
meso_token="4192c18707b848299783d59a9317c6e1"
m=Meso(meso_token)

import reproducibility

In [None]:
# Change directory to data
os.chdir('data')

---

**Scenarios 1-6**: simulated moisture with default equilibrium parameters. Vary rain from none up to very heavy as described above.

In [None]:
# Dictionary to be saved for testing
test_dict = {}

In [None]:
## Case 1: no rain
synt_dat=synthetic_data(max_rain = 0) 
synt_dat['id'] = 1
synt_dat['title'] = 'Synthetic Data (no rain)'
synt_dat['descr'] = 'Max rain: 0 mm/hr; Emin: 5; Emax: 30; 20 days'

test_dict['case1'] = synt_dat # save to test dictionary

In [None]:
## Case 2: light rain
synt_dat=synthetic_data(max_rain = 0.4) 
synt_dat['id'] = 2
synt_dat['title'] = 'Synthetic Data (light rain)'
synt_dat['descr'] = 'Max rain: .4 mm/hr; Emin: 5; Emax: 30; 20 days'

test_dict['case2'] = synt_dat # save to test dictionary

In [None]:
## Case 3: moderate rain
synt_dat=synthetic_data(max_rain = 3) 
synt_dat['id'] = 3
synt_dat['title'] = 'Synthetic Data (med. rain)'
synt_dat['descr'] = 'Max rain: 3 mm/hr; Emin: 5; Emax: 30; 20 days'

test_dict['case3'] = synt_dat # save to test dictionary

In [None]:
## Case 4: heavy rain
synt_dat=synthetic_data(max_rain = 6) 
synt_dat['id'] = 4
synt_dat['title'] = 'Synthetic Data (heavy rain)'
synt_dat['descr'] = 'Max rain: 6 mm/hr; Emin: 5; Emax: 30; 20 days'

test_dict['case4'] = synt_dat # save to test dictionary

In [None]:
## Case 5: very heavy rain 1
synt_dat=synthetic_data(max_rain = 10) 
synt_dat['id'] = 5
synt_dat['title'] = 'Synthetic Data (very heavy rain 1)'
synt_dat['descr'] = 'Max rain: 10 mm/hr; Emin: 5; Emax: 30; 20 days'

test_dict['case5'] = synt_dat # save to test dictionary

In [None]:
## Case 6: very heavy rain 2
synt_dat=synthetic_data(max_rain = 15) 
synt_dat['id'] = 6
synt_dat['title'] = 'Synthetic Data (very heavy rain 2)'
synt_dat['descr'] = 'Max rain: 15 mm/hr; Emin: 5; Emax: 30; 20 days'

test_dict['case6'] = synt_dat # save to test dictionary

In [None]:
plot_data(test_dict['case6'])

---

**Scenario 7-8:** RAWS Data, multiple time slices

In [None]:
## Read RAWS data with MesoPy

time_start = "201806010800"  # June 1 2018 08:00 in format yyyymmddHHMM
time_end   = "201907200900"  # June 20 2018 09:00 in format yyyymmddHHMM

vars='air_temp,relative_humidity,precip_accum,fuel_moisture'

In [None]:
meso_ts = m.timeseries(time_start, time_end, stid="CPTC2", showemptystations = '0', vars=vars)   # ask the object for data

In [None]:

def format_raws(stn, fixnames = True):
    raws_dat = stn['OBSERVATIONS']
    
    # Convert to Numpy arrays, check data type for floats
    for key in [*stn['OBSERVATIONS'].keys()]:
        if type(stn['OBSERVATIONS'][key][0]) is float:
            raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
        else:
            raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
    
    # Transform Data
    raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
    if 'precip_accum_set_1' in raws_dat.keys():
        raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
    
    
    # Calculate Equilibrium Temps
    raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
    raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
    
    # Fix nan values
    for key in [*raws_dat.keys()]:
        if type(raws_dat[key][0]) is float:
            raws_dat[key] = fixnan(raws_dat[key], 2)
    
    # Add station id
    raws_dat['STID'] = stn['STID']
    
    # Add lat/lon
    raws_dat['LATITUDE'] = stn['LATITUDE']
    raws_dat['LONGITUDE'] = stn['LONGITUDE']
    
    # Simplify names 
    if fixnames:
        var_mapping = {
            'date_time': 'time', 'precip_accum': 'rain', 
            'fuel_moisture': 'fm', 'relative_humidity': 'rh',
            'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew', 'STID': 'STID',
            'LONGITUDE': 'lon', 'LATITUDE': 'lat'
            }
        old_keys = [*raws_dat.keys()]
        old_keys = [k.replace("_set_1", "") for k in old_keys]
        new_keys = []
        for key in old_keys:
            new_keys.append(var_mapping.get(key, key))
        old_keys = [*raws_dat.keys()]
        old_keys = [k.replace("_set_1", "") for k in old_keys]
        new_keys = []
        for key in old_keys:
            new_keys.append(var_mapping.get(key, key))
        raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
        return raws_dat2
    
    else: return raws_dat

def format_precip(precipa):
    rain=np.array(precipa, dtype = 'float64')
    rain = np.diff(rain) # first difference to convert accumulated to hourly
    rain = np.insert(rain, 0, [np.NaN]) # add NaN entry to account for diff
    rain[rain > 1000] = np.NaN # filter out erroneously high
    rain[rain < 0] = np.NaN # filter out negative, results from diff function after precipa goes to zero
    return rain

In [None]:
raws1 = format_raws(meso_ts['STATION'][0])

In [None]:
# Scenario 7: time 0-1200 for station
## Heavy rain at end of time period
dict1={
    'id': 7,
    'time': raws1['time'][0:1200],
    'rain': raws1['rain'][0:1200],
    'fm' : raws1['fm'][0:1200],
    'rh' : raws1['rh'][0:1200],
    'temp' : raws1['temp'][0:1200],
    'Ed' : raws1['Ed'][0:1200],
    'Ew' : raws1['Ew'][0:1200],
    'STID' : raws1['STID'],
    'title' : 'RAWS Station CPTC2 #1',
    'descr' : 'Real surface level data, very heavy rain at end',
    'hours':1200,
    'h2':300,
    'other': {'lon': raws1['lon'], 'lat': raws1['lat']}
}

test_dict['case7'] = dict1 # save to test dictionary

In [None]:
plot_data(dict1)

In [None]:
# Scenario 8: time 800-2000 for station
## Heavy rain at beginning of time period
dict1={
    'id': 8,
    'time': raws1['time'][800:2000],
    'rain': raws1['rain'][800:2000],
    'fm' : raws1['fm'][800:2000],
    'rh' : raws1['rh'][800:2000],
    'temp' : raws1['temp'][800:2000],
    'Ed' : raws1['Ed'][800:2000],
    'Ew' : raws1['Ew'][800:2000],
    'STID' : raws1['STID'],
    'title' : 'RAWS Station CPTC2 #2',
    'descr' : 'Real surface level data, very heavy rain at beginning',
    'hours':1200,
    'h2':300,
    'other': {'lon': raws1['lon'], 'lat': raws1['lat']}
}

test_dict['case8'] = dict1 # save to test dictionary

In [None]:
plot_data(test_dict['case8'])

---

**Scenario 9-10:** RTMA Data, multiple time slices at station BKCU1

In [None]:
rtma = from_json('kf_orig.json')

In [None]:
rtma = from_json('rtma.json')

In [None]:
rtma.keys()
# print(rtma['time_str'][3000])

In [None]:
print(rtma['obs_lat'])
print(rtma['obs_lon'])

In [None]:
## Read RAWS data with MesoPy

time_start = "201807041600"  # '2018-07-04 16:00'
time_end   = "201810040900"  # '2018-10-04 08:00', 1 hr buffer

vars='fuel_moisture'

meso_ts = m.timeseries(time_start, time_end, stid="BKCU1", showemptystations = '0', vars=vars)   # ask the object for data

In [None]:
print(meso_ts['STATION'][0]['LATITUDE'])
print(meso_ts['STATION'][0]['LONGITUDE'])

In [None]:
def format_rtma(rtma):
    td = np.array(rtma['td'])
    t2 = np.array(rtma['temp'])
    rain=np.array(rtma['precipa'])
    # compute relative humidity
    rh = 100*np.exp(17.625*243.04*(td - t2) / (243.04 + t2 - 273.15) / (243.0 + td - 273.15))
    Ed = 0.924*rh**0.679 + 0.000499*np.exp(0.1*rh) + 0.18*(21.1 + 273.15 - t2)*(1 - np.exp(-0.115*rh))
    Ew = 0.618*rh**0.753 + 0.000454*np.exp(0.1*rh) + 0.18*(21.1 + 273.15 - t2)*(1 - np.exp(-0.115*rh))

    rtma_dict = {
        'time': rtma['time_str'],
        'rain': format_precip(rtma['precipa']),
        # 'fm' : np.array(fm),
        'rh' : __,
        'temp' : t2,
        'rh' : rh,
        'Ed' : Ed,
        'Ew' : Ew,
        'lat' : rtma['obs_lat'], 
        'lon' : rtma['obs_lon']
    }
    
    return rtma_dict

In [None]:
rtma1 = format_rtma(rtma)
fm = np.array(meso_ts['STATION'][0]['OBSERVATIONS']['fuel_moisture_set_1'])

In [None]:
# Scenario 9: time 800:2000 for rtma location
## moderate rain towards end of time period
dict1={
    'id': 9,
    'time': rtma1['time'][800:2000],
    'rain': rtma1['rain'][800:2000],
    'fm' : fm[0:1200],
    'rh' : rtma1['rh'][800:2000],
    'temp' : rtma1['temp'][800:2000],
    'Ed' : rtma1['Ed'][800:2000],
    'Ew' : rtma1['Ew'][800:2000],
    'title' : 'RTMA Data, Fuel from RAWS Station BKCU1 #1',
    'descr' : 'rtma weather, surface level fuel, moderate rain at end',
    'hours':1200,
    'h2':300,
    'other': {'lon': rtma1['lon'], 'lat': rtma1['lat']}
}

test_dict['case9'] = dict1 # save to test dictionary

In [None]:
plot_data(test_dict['case9'])

In [None]:
# Scenario 10: time 1000-2200 for same location
## moderate rain towards end of time period
dict1={
    'id': 10,
    'time': rtma1['time'][1800:3000],
    'rain': rtma1['rain'][1800:3000],
    'fm' : fm[1000:2200],
    'rh' : rtma1['rh'][1800:3000],
    'temp' : rtma1['temp'][1800:3000],
    'Ed' : rtma1['Ed'][1800:3000],
    'Ew' : rtma1['Ew'][1800:3000],
    'title' : 'RTMA Data, Fuel from RAWS Station BKCU1 #2',
    'descr' : 'rtma weather, surface level fuel, heavy rain at end',
    'hours':1200,
    'h2':300,
    'other': {'lon': rtma1['lon'], 'lat': rtma1['lat']}
}

test_dict['case10'] = dict1 # save to test dictionary

In [None]:
plot_data(test_dict['case10'])

### Scenario 11: Original RNN

RNN generated from notebook to a json file, used for reproducibility since the beginning. RTMA data from same station.

In [None]:
rnn = from_json('rnn_orig.json')

In [None]:
rnn.keys()

In [None]:
# Scenario 11:
N = rnn['Ed'].shape[0]

dict1={
    'id': 11,
    'time': None,
    'rain': rnn['rain'][0:N],
    'fm' : rnn['fm'][0:N],
    'Ed' : rnn['Ed'][0:N],
    'Ew' : rnn['Ew'][0:N],
    'rain' : rnn['rain'][0:N],
    'title' : 'RNN Orig',
    'descr' : 'rtma weather, surface level fuel, moderate rain at end',
    'hours':N,
    'h2':300,
    'other': {'lon': rtma1['lon'], 'lat': rtma1['lat']}
}

test_dict['case11'] = dict1 # save to test dictionary

In [None]:
plot_data(test_dict['case11'])

## Save Output

In [None]:
import pickle
with open('testing_dict.pickle', 'wb') as handle:
    pickle.dump(test_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
test_dict.keys()

In [None]:
test_dict['case3'].keys()

## Sample KF Loop on Dict

In [None]:
with open('testing_dict.pickle', 'rb') as handle:
    test_dict = pickle.load(handle)

In [None]:
err_dict = {} # dictionary to save validation error for cases

In [None]:
# Calculate mean squared error
def mse(a, b):
    return ((a - b)**2).mean()

def mse_data(dat, hours = None, h2 = None):
    if hours is None:
        hours = dat['hours']
    if h2 is None:
        h2 = dat['h2']
    
    m = dat['m']
    fm = dat['fm']
    
    train = mse(m[:h2], fm[:h2])
    test = mse(m[h2:hours], fm[h2:hours])
    print('Training MSE:   ' + str(np.round(train, 4)))
    print('Prediction MSE: ' + str(np.round(test, 4)))
          
    return train, test

In [None]:
for key in [*test_dict.keys()]:
    print(key, ':', test_dict[key]['title'])
    
    dict1 = test_dict[key]
    m,Ec = mod.run_augmented_kf(dict1)
    dict1['m']=m
    
    errs = mse_data(dict1)
    
    print('-'*25)
    
    err_dict[key] = {
        'title' : test_dict[key]['title'],
        'train' : errs[0],
        'test' : errs[1],
        'm_hash' : int(hash2(m)),
        # 'm' : m,
        'Ec' : Ec # equil. correction learned by training
    }

In [None]:
to_json(err_dict, 'errors.json')

In [None]:
err_dict