In [None]:
# Environment
import numpy as np
import pandas as pd
from MesoPy import Meso
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
# Local modules for handling data and running moisture models
import data_funcs as datf
from data_funcs import format_precip, fixnan
import moisture_models as mod

meso_token="4192c18707b848299783d59a9317c6e1"
m=Meso(meso_token)

In [None]:
# Calculate mean squared error
def mse(a, b):
    return ((a - b)**2).mean()
# Calculate mean absolute error
def mape(a, b):
    return ((a - b).__abs__()).mean()

In [None]:
def vprint(*args):
    if verbose: 
        for s in args[:(len(args)-1)]:
            print(s, end=' ')
        print(args[-1])

## Validation Setup

In [None]:
time_start = "201806010800"
hours = 1200 # total simulation time
time_end = datetime.strptime(time_start, "%Y%m%d%H%M")+timedelta(hours = hours+1) # end time, plus a buffer to control for time shift
time_end = str(int(time_end.strftime("%Y%m%d%H%M")))
h2 = 300 # training period
train_hrs = np.arange(0, h2) # training time
test_hrs = np.arange(h2, hours) # forecast time

print('Time Parameters:')
print('-'*50)
print('Time Start:', datetime.strptime(time_start, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
print('Time End:', datetime.strptime(time_end, "%Y%m%d%H%M").strftime("%Y/%M/%d %H:%M"))
print('Total Runtime:', hours, 'hours')
print('Training Time:', h2, 'hours')
print('-'*50)

## Retrieve RAWS Data

In [None]:
def format_raws(stn, fixnames = True):
    raws_dat = stn['OBSERVATIONS']
    
    # Convert to Numpy arrays, check data type for floats
    for key in [*stn['OBSERVATIONS'].keys()]:
        if type(stn['OBSERVATIONS'][key][0]) is float:
            raws_dat[key] = np.array(stn['OBSERVATIONS'][key], dtype = 'float64')
        else:
            raws_dat[key] = np.array(stn['OBSERVATIONS'][key])
    
    # Transform Data
    raws_dat['air_temp_set_1'] = raws_dat['air_temp_set_1'] + 273.15 ## convert C to K
    if 'precip_accum_set_1' in raws_dat.keys():
        raws_dat['precip_accum_set_1'] = format_precip(raws_dat['precip_accum_set_1']) ## format precip data, accumulated to hourly
    
    
    # Calculate Equilibrium Temps
    raws_dat['Ed'] = 0.924*raws_dat['relative_humidity_set_1']**0.679 + 0.000499*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
    raws_dat['Ew'] = 0.618*raws_dat['relative_humidity_set_1']**0.753 + 0.000454*np.exp(0.1*raws_dat['relative_humidity_set_1']) + 0.18*(21.1 + 273.15 - raws_dat['air_temp_set_1'])*(1 - np.exp(-0.115*raws_dat['relative_humidity_set_1']))
    
    # Fix nan values
    for key in [*raws_dat.keys()]:
        if type(raws_dat[key][0]) is float:
            raws_dat[key] = fixnan(raws_dat[key], 2)
    
    # Simplify names 
    if fixnames:
        var_mapping = {
            'date_time': 'time', 'precip_accum': 'rain', 
            'fuel_moisture': 'fm', 'relative_humidity': 'rh',
            'air_temp': 'temp', 'Ed': 'Ed', 'Ew': 'Ew'
            }
        old_keys = [*raws_dat.keys()]
        old_keys = [k.replace("_set_1", "") for k in old_keys]
        new_keys = []
        for key in old_keys:
            new_keys.append(var_mapping.get(key, key))
        old_keys = [*raws_dat.keys()]
        old_keys = [k.replace("_set_1", "") for k in old_keys]
        new_keys = []
        for key in old_keys:
            new_keys.append(var_mapping.get(key, key))
        raws_dat2 = dict(zip(new_keys, list(raws_dat.values())))
        return raws_dat2
    
    else: return raws_dat

In [None]:
def retrieve_raws(stid, raws_vars, time1, time2):
    meso_ts = m.timeseries(time1, time2, 
                       stid=stid, vars=raws_vars)
    station = meso_ts['STATION'][0]
    
    raws_dat = format_raws(station)
    
    return station, raws_dat

In [None]:
raws_vars='air_temp,relative_humidity,precip_accum,fuel_moisture'

In [None]:
station, raws_dat = retrieve_raws("BKCU1", raws_vars, time_start, time_end)

In [None]:
def plot_dat(stn, dat, val):
    plt.figure(figsize=(16,4))
    plt.plot(dat[val],linestyle='-',c='k')
    plt.title(stn['STID']+' '+ val)
    plt.xlabel('Time (hours)') 
    plt.ylabel('val')

In [None]:
%matplotlib inline
plot_dat(station, raws_dat, 'fm')

In [None]:
print('Data Read:')
print('-'*50)
print('Station ID:', station['STID'])
print('Lat / Lon:', station['LATITUDE'],', ',station['LONGITUDE'])
if(station['QC_FLAGGED']): print('WARNING: station flagged for QC')
print('-'*50)

## Retrieve RTMA Function

<mark>Not needed?</mark>

## Fit Augmented KF

In [None]:
m,Ec = mod.run_augmented_kf(raws_dat['fm'],raws_dat['Ed'],raws_dat['Ew'],raws_dat['rain'],h2,hours)  # extract from state

In [None]:
def plot_moisture(hmin,hmax):
    print('training from 0 to',h2,'plot from',hmin,'to',hmax)
    plt.figure(figsize=(16,4))
    plt.plot(range(hmin,hmax),raws_dat['Ed'][hmin:hmax],linestyle='--',c='r',label='Drying Equilibrium (%)')
    plt.plot(range(hmin,hmax),raws_dat['Ew'][hmin:hmax],linestyle='--',c='b',label='Wetting Equilibrium (%)')
    plt.plot(range(hmin,hmax),Ec[hmin:hmax],linestyle='--',c='g',label='Equilibrium Correction (%)')
    plt.plot(range(hmin,hmax),m[hmin:hmax],linestyle='-',c='k',label='filtered')
    plt.plot(range(hmin,hmax),raws_dat['fm'][hmin:hmax],linestyle='-',c='b',label='RAWS data (%)')
    plt.plot(range(hmin,hmax),raws_dat['rain'][hmin:hmax],linestyle='-',c='b',label='RTMA rain (mm/h)')
    if hmin>=h2:
        plt.plot(m[hmin:h2],linestyle='-',c='k',label='Filtered')
    h1 = np.maximum(hmin,h2)
    plt.plot(range(h1,hmax),m[h1:hmax],linestyle='-',c='r',label='Forecast (%)')
    plt.title(station['STID'] +' Kalman filtering and forecast with augmented state, real data. Training 0:%i hmax' % h2)
    plt.xlabel('Time (hours)') 
    plt.ylabel('Fuel moisture content (%)')
    plt.legend()

In [None]:
plot_moisture(0, hours)

In [None]:
# Overall Error
# print(mse(m, raws_dat['fm'][0:hours]))

# Forecast Error
print('Forecast MSE: ' + str(np.round(mse(m[h2:hours], raws_dat['fm'][h2:hours]), 4)))

## Fit RNN Model

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.utils.vis_utils import plot_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
import matplotlib.pyplot as plt
import tensorflow as tf
import keras.backend as K
from keras.utils.vis_utils import plot_model
from scipy.interpolate import LinearNDInterpolator, interpn
from scipy.optimize import root

## Local Modules
from moisture_models import create_RNN, create_RNN_2, staircase, seq2batches

In [None]:
# Set seed for reproducibility
tf.random.set_seed(123)

In [None]:
def staircase(x,y,timesteps,trainsteps,return_sequences=False, verbose = False):
    # x [trainsteps+forecaststeps,features]    all inputs
    # y [trainsteps,outputs]
    # timesteps: split x and y into samples length timesteps, shifted by 1
    # trainsteps: number of timesteps to use for training, no more than y.shape[0]
    vprint('shape x = ',x.shape)
    vprint('shape y = ',y.shape)
    vprint('timesteps=',timesteps)
    vprint('trainsteps=',trainsteps)
    outputs = y.shape[1]
    features = x.shape[1]
    forecaststeps = x.shape[0]-trainsteps
    samples = trainsteps-timesteps+1
    vprint('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
    x_train = np.empty([samples, timesteps, features])
    vprint('return_sequences=',return_sequences)
    if return_sequences:
        vprint('returning all timesteps in a sample')
        y_train = np.empty([samples, timesteps, outputs])  # all
        for i in range(samples):
            for k in range(timesteps):
                for j in range(features):
                    x_train[i,k,j] = x[i+k,j]
                for j in range(outputs):
                    y_train[i,k,j] = y[i+k,j]
    else:
        vprint('returning only the last timestep in a sample')
    y_train = np.empty([samples, outputs])
    for i in range(samples):
        for j in range(features):
            for k in range(timesteps):
                x_train[i,k,j] = x[i+k,j]
        for j in range(outputs):
            y_train[i,j] = y[i+timesteps-1,j]

    return x_train, y_train

In [None]:
def create_rnn_data(dat, scale = False, verbose = False):
    Ew = dat['Ew']
    Ed = dat['Ed']
    rain = dat['rain']
    fm = dat['fm']
    temp = dat['temp']
    
    # Average Equilibrium
    E = (Ed + Ew)/2
    
    # transform as 2D, (timesteps, features) and (timesteps, outputs)
    Et = np.reshape(E,[E.shape[0],1])
    datat = np.reshape(fm,[fm.shape[0],1])
    
    # Scale Data if required
    scale=False
    if scale:
        scalerx = MinMaxScaler()
        scalerx.fit(Et)
        Et = scalerx.transform(Et)
        scalery = MinMaxScaler()
        scalery.fit(datat)
        datat = scalery.transform(datat)
        
    # split data
    x_train, y_train = staircase(Et,datat,timesteps=5,trainsteps=h2,
                                 return_sequences=False, verbose = verbose)
    vprint('x_train shape=',x_train.shape)
    samples, timesteps, features = x_train.shape
    vprint('y_train shape=',y_train.shape)

    h0 = tf.convert_to_tensor(datat[:samples],dtype=tf.float32)
    
    # Set up return dictionary
    
    rnn_dat = {
        'x_train': x_train,
        'y_train': y_train,
        'Et': Et,
        'samples': samples,
        'timesteps': timesteps,
        'features': features,
        'h0': h0
    }
    
    return rnn_dat

In [None]:
verbose = True
scale = False
rnn_dat = create_rnn_data(raws_dat, scale)

In [None]:
def create_RNN_2(hidden_units, dense_units, activation, stateful=False, 
                 batch_shape=None, input_shape=None, dense_layers=1,
                 rnn_layers=1,return_sequences=False,
                 initial_state=None, verbose = True):
    if stateful:
        inputs = tf.keras.Input(batch_shape=batch_shape)
    else:
        inputs = tf.keras.Input(shape=input_shape)
    # https://stackoverflow.com/questions/43448029/how-can-i-print-the-values-of-keras-tensors
    # inputs2 = K.print_tensor(inputs, message='inputs = ')  # change allso inputs to inputs2 below, must be used
    x = inputs
    for i in range(rnn_layers):
        x = tf.keras.layers.SimpleRNN(hidden_units,activation=activation[0],
              stateful=stateful,return_sequences=return_sequences)(x
              # ,initial_state=initial_state
              )
    # x = tf.keras.layers.Dense(hidden_units, activation=activation[1])(x)
    for i in range(dense_layers):
        x = tf.keras.layers.Dense(dense_units, activation=activation[1])(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [None]:
def train_rnn(rnn_dat, activation, hidden_units, dense_units, dense_layers, verbose = False):
    
    samples = rnn_dat['samples']
    features = rnn_dat['features']
    timesteps = rnn_dat['timesteps']
    
    model_fit=create_RNN_2(hidden_units=hidden_units, 
                        dense_units=dense_units, 
                        batch_shape=(samples,timesteps,features),
                        stateful=True,
                        return_sequences=False,
                        # initial_state=h0,
                        activation=activation,
                        dense_layers=dense_layers)
    
    Et = rnn_dat['Et']
    model_predict=create_RNN_2(hidden_units=hidden_units, dense_units=dense_units,  
                            input_shape=(hours,features),stateful = False,
                            return_sequences=True,
                            activation=activation,dense_layers=dense_layers)

    vprint('model_predict input shape',Et.shape,'output shape',model_predict(Et).shape)
    if verbose: print(model_predict.summary())
    
    x_train = rnn_dat['x_train']
    y_train = rnn_dat['y_train']

    # fitting
    DeltaE = 0
    w_exact=  [np.array([[1.-np.exp(-0.1)]]), np.array([[np.exp(-0.1)]]), np.array([0.]),np.array([[1.0]]),np.array([-1.*DeltaE])]
    w_initial=[np.array([[1.-np.exp(-0.1)]]), np.array([[np.exp(-0.1)]]), np.array([0.]),np.array([[1.0]]),np.array([-1.0])]
    w=model_fit.get_weights()
    for i in range(len(w)):
        vprint('weight',i,'shape',w[i].shape,'ndim',w[i].ndim,'given',w_initial[i].shape)
        for j in range(w[i].shape[0]):
            if w[i].ndim==2:
                for k in range(w[i].shape[1]):
                    w[i][j][k]=w_initial[i][0][0]/w[i].shape[0]
            else:
                w[i][j]=w_initial[i][0]
    model_fit.set_weights(w)
    model_fit.fit(x_train, y_train, epochs=5000,batch_size=samples, verbose=0)
    w_fitted=model_fit.get_weights()
    for i in range(len(w)):
        vprint('weight',i,' exact:',w_exact[i],':  initial:',w_initial[i],' fitted:',w_fitted[i])
    
    model_predict.set_weights(w_fitted)
    
    return model_predict

In [None]:
verbose = 1
model_predict = train_rnn(
    rnn_dat,
    activation=['linear','linear'],
    hidden_units=3,
    dense_units=1,
    dense_layers=1,
    verbose = verbose
)

In [None]:
def rnn_predict(rnn_dat, scale = False, verbose = False):
    scale = False
    # model_predict.set_weights(w_fitted)
    x_input=np.reshape(rnn_dat['Et'],(1, hours, 1))
    y_output = model_predict.predict(x_input, verbose = verbose)
    
    vprint('x_input.shape=',x_input.shape,'y_output.shape=',y_output.shape)
    
    m = np.reshape(y_output,hours)
    # print('weights=',w)
    if scale:
        vprint('scaling')
        m = scalery.inverse_transform(m)
    m = np.reshape(m,hours)
    
    return m

In [None]:
verbose = 0
m = rnn_predict(rnn_dat)

In [None]:
# scale = False
# # model_predict.set_weights(w_fitted)
# x_input=np.reshape(rnn_dat['Et'],(1, hours, 1))
# y_output = model_predict.predict(x_input)
# print('x_input.shape=',x_input.shape,'y_output.shape=',y_output.shape)
# # print(shift)
# m = np.reshape(y_output,hours)
# # print('weights=',w)
# if scale:
#     print('scaling')
#     m = scalery.inverse_transform(m)
# m = np.reshape(m,hours)
# hour=np.array(range(hours))
title="RNN forecast"
plt.figure(figsize=(16,4))
plt.plot(hour,rnn_dat['Et'][:,0],linestyle='--',c='r',label='E=Equilibrium data')
# print(len(hour),len(m_f))
plt.scatter(hour,raws_dat['fm'],c='b',label='data=10-h fuel data')
if m is not None:
    plt.plot(hour[:h2],m[:h2],linestyle='-',c='k',label='m=filtered')
    plt.plot(hour[h2:hours],m[h2:hours],linestyle='-',c='r',label='m=forecast')
plt.title(title) 
plt.legend()

In [None]:
# Overall Error
print(mse(m, raws_dat['fm'][0:hours]))

# Forecast Eror
print(mse(m[h2:hours], raws_dat['fm'][h2:hours]))