## Imports

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from math import ceil, sqrt
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
import pickle
import seaborn as sns
from scalecast.Forecaster import Forecaster
import tensorflow as tf

## DF Import and Prep

In [None]:
data_file_path = 'data/drought_forecasting_data.csv'
target = 'QV2M'
date = {
  'column' : 'date',
  "format" : '%Y-%m-%d'
}

df = pd.read_csv(data_file_path)
df.index = pd.to_datetime(df[date['column']],format=date['format'])
df

### test/train split

In [None]:
train_size = ceil(0.8*df.shape[0])
train = df[:train_size]
test = df[train_size:]

In [None]:
train[target].plot(figsize=(15,8),fontsize=14)
test[target].plot(figsize=(15,8), fontsize=14)
plt.show()

## Simple Average

In [None]:
SA_test = test.copy()
SA_test['pred']=train[target].mean()
pred=train[target].mean()
plt.figure(figsize=(12,8))
plt.plot(train[target], label='Train')
plt.plot(test[target], label='Test')
plt.plot(SA_test['pred'], label='Simple Average Forecast')
plt.legend(loc='best')
plt.show()

In [None]:
rmse=sqrt(mean_squared_error(test[target], SA_test.pred))
print(rmse)


## Persistence Model

In [None]:
PM_test = test.copy()
PM_test['pred'] = PM_test[target].shift(1)
PM_test['pred'][0] = train[target][-1]
plt.figure(figsize=(12,8))
plt.plot(train[target], label='Train')
plt.plot(test[target], label='Test')
plt.plot(PM_test['pred'], label='Persistence Model Forecast')
plt.legend(loc='best')
plt.show()

In [None]:
rmse=sqrt(mean_squared_error(test[target], PM_test.pred))
print(rmse)

## Rolling Mean

In [None]:
p = 2
RM_test = test.copy()
test_size = RM_test.shape[0]

RM_pred = list()
for i in range(test_size):
  pred = df[-test_size-p+i:-test_size+i][target].mean()
  RM_pred.append(pred)
RM_test['pred']=RM_pred

plt.figure(figsize=(12,8))
plt.plot(train[target], label='Train')
plt.plot(test[target], label='Test')
plt.plot(RM_test["pred"], label='Rolling Mean Forecast')
plt.legend(loc='best')
plt.show()

In [None]:
rmse=sqrt(mean_squared_error(test[target], RM_test.pred))
print(rmse)

## ARIMA

In [None]:
def arima(order):
  pred = ARIMA(train[target], order=order)
  model = pred.fit(method_kwargs={'warn_convergence': False})
  ARIMA_pred = model.forecast(steps=len(test), signal_only=False)
  return ARIMA_pred

In [None]:
from tqdm import tqdm

rmse_values = list()
d_opts = (0,1,2)
pq_opts = (1,2,3,4,5)
for d in d_opts:
  for p in pq_opts:
    for q in tqdm(pq_opts):
      order = (p, d, q)
      ARIMA_pred = arima(order)
      rmse=sqrt(mean_squared_error(test[target], ARIMA_pred))
      rmse_values.append([rmse, order])

In [None]:
rmse_values.sort()
ARIMA_pred = arima(rmse_values[0][1])
plt.figure(figsize=(16,8))
plt.plot( train[target], label='Train')
plt.plot(test[target], label='Test')
ARIMA_pred.index = test.index
plt.plot(ARIMA_pred, label='ARIMA')
plt.legend(loc='best')
plt.show()

In [None]:
rmse_opts

In [None]:
rmse=sqrt(mean_squared_error(test[target], ARIMA_pred))
print(rmse)

## LSTM

In [None]:
sns.set(rc={'figure.figsize':(16,8)})
test_size = test.shape[0]

In [None]:
f = Forecaster(y=df[target],
                   current_dates=df[date["column"]])
f.set_test_length(test_size)
f.generate_future_dates(test_size) 
f.set_estimator('lstm')            

In [None]:
def lstm(lr, hs):
  f.manual_forecast(call_me=f'lstm-lr={str(lr)}-hidden_size={str(hs)}',
                    epochs=25,
                    callbacks=tf.keras.callbacks.EarlyStopping(
                        monitor='mse',
                        patience=5,
                    ),
                    lstm_layer_sizes=(hs,hs,hs),
                    dropout=(0,0,0),
                    learning_rate=lr
  )

In [None]:
lr_opts = (0.001, 0.05, 0.01)
hs_opts = (8, 16, 32)
rmse_values = list()

for lr in lr_opts:
  for hs in hs_opts:
    lstm(lr,hs)
    LSTM_pred = f.export()["test_set_predictions"]
    rmse=sqrt(mean_squared_error(test[target], LSTM_pred[f"lstm-lr={str(lr)}-hidden_size={str(hs)}"]))
    rmse_values.append([rmse, (lr, hs)])

In [None]:
rmse_values

In [None]:
f.plot_test_set((
    "lstm-lr=0.01-hidden_size=32"
))