In [2]:
import numpy as np 
from tqdm import tqdm 
from mySSA import mySSA
import matplotlib.pyplot as plt 
import pandas as pd 
import warnings
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

import logging
logging.getLogger('fbprophet').setLevel(logging.ERROR)


In [3]:
df = pd.read_parquet("../data/sym_data/sym_kf_1h_with_inflow_and_rain_forecast.parquet")
df = df.set_index("time")
df = df[["inflow_kf", "precip"]]


In [4]:
start_date = pd.to_datetime('2023-05-01 00:00:00+00:00')
date_range = pd.date_range(start=start_date, periods=len(df), freq='H')
df.index = date_range

In [5]:
from scipy import stats
from prophet import Prophet

df["inflow_kf"], _ = stats.boxcox(df["inflow_kf"].values)
df = df.reset_index()
df = df.rename(columns={'index':'ds', 'inflow_kf':'y'})
df['ds'] = df['ds'].dt.tz_localize(None)

holiday_df = df[df['precip'] > 0][['ds']]
holiday_df['holiday'] = 'rainy_day'
holiday_df['lower_window'] = 0
holiday_df['upper_window'] = 1

In [6]:
df = df.dropna()

In [7]:
m = Prophet(daily_seasonality=True)
m.fit(df[:240])
future = m.make_future_dataframe(periods=24, freq='H', include_history=False)
forecast = m.predict(future)

13:33:38 - cmdstanpy - INFO - Chain [1] start processing
13:33:38 - cmdstanpy - INFO - Chain [1] done processing


In [8]:
window_size = 500
steps = 400
ytrue = []
predictions = []
L = 200
K = window_size - L + 1
steps = 24
streams10 = [i for i in range(3)]
 
for i in tqdm(range(0, len(df) - window_size + 1, steps)):
    ts_slice = df.iloc[i:i + window_size, :]
    m = Prophet(daily_seasonality=True, 
                
                seasonality_mode='multiplicative',
                holidays = holiday_df,
                changepoint_prior_scale=0.001)
    m.add_regressor('precip', prior_scale=2)
    m.add_seasonality(name='hourly', period=5/24, fourier_order=5)
    m.fit(ts_slice)
    future = m.make_future_dataframe(periods=24, freq='H', include_history=False)
    future["precip"] = df.iloc[i+window_size:i+window_size+24, 2][:24].values
    future["rain"] =  np.where(df.iloc[i+window_size:i+window_size+24, 2][:24].values > 0, 1, 0)
    forecast = m.predict(future)
    forecast["ytrue"] = df.iloc[i+window_size:i+window_size+24, 1:2].values.flatten()
    predictions.append(forecast)
    if i == 24:
        break
    

  0%|          | 0/156 [00:00<?, ?it/s]13:33:38 - cmdstanpy - INFO - Chain [1] start processing
13:33:38 - cmdstanpy - INFO - Chain [1] done processing
  1%|          | 1/156 [00:00<01:34,  1.64it/s]13:33:38 - cmdstanpy - INFO - Chain [1] start processing
13:33:39 - cmdstanpy - INFO - Chain [1] done processing
  1%|          | 1/156 [00:01<03:07,  1.21s/it]


In [9]:
results_df = pd.concat(predictions)

In [13]:
results_df

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,daily,daily_lower,daily_upper,extra_regressors_multiplicative,...,rainy_day_lower,rainy_day_upper,weekly,weekly_lower,weekly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat,ytrue
0,2023-05-21 20:00:00,2.077971,2.081122,2.102141,2.077971,2.077971,0.002978,0.002978,0.002978,-1.8e-05,...,0.0,0.0,-0.003081,-0.003081,-0.003081,0.0,0.0,0.0,2.092138,2.089467
1,2023-05-21 21:00:00,2.077894,2.079996,2.100643,2.077894,2.077894,0.001868,0.001868,0.001868,-1.8e-05,...,0.0,0.0,-0.002941,-0.002941,-0.002941,0.0,0.0,0.0,2.090179,2.085871
2,2023-05-21 22:00:00,2.077816,2.077498,2.097683,2.077816,2.077816,0.000557,0.000557,0.000557,-1.8e-05,...,0.0,0.0,-0.0028,-0.0028,-0.0028,0.0,0.0,0.0,2.08772,2.082624
3,2023-05-21 23:00:00,2.077738,2.074416,2.095474,2.077738,2.077738,-0.00108,-0.00108,-0.00108,-1.8e-05,...,0.0,0.0,-0.002658,-0.002658,-0.002658,0.0,0.0,0.0,2.084723,2.077346
4,2023-05-22 00:00:00,2.077661,2.069807,2.089682,2.077661,2.077662,-0.003214,-0.003214,-0.003214,-1.8e-05,...,0.0,0.0,-0.002516,-0.002516,-0.002516,0.0,0.0,0.0,2.080305,2.072804
5,2023-05-22 01:00:00,2.077583,2.065057,2.085091,2.077582,2.077586,-0.005774,-0.005774,-0.005774,-1.8e-05,...,0.0,0.0,-0.002372,-0.002372,-0.002372,0.0,0.0,0.0,2.075035,2.067468
6,2023-05-22 02:00:00,2.077506,2.05947,2.080445,2.077502,2.077511,-0.008261,-0.008261,-0.008261,-1.8e-05,...,0.0,0.0,-0.002229,-0.002229,-0.002229,0.0,0.0,0.0,2.070226,2.063186
7,2023-05-22 03:00:00,2.077428,2.057073,2.077112,2.077421,2.077435,-0.009865,-0.009865,-0.009865,-1.8e-05,...,0.0,0.0,-0.002086,-0.002086,-0.002086,0.0,0.0,0.0,2.067163,2.058816
8,2023-05-22 04:00:00,2.077351,2.057112,2.077548,2.077341,2.077361,-0.009899,-0.009899,-0.009899,-1.8e-05,...,0.0,0.0,-0.001942,-0.001942,-0.001942,0.0,0.0,0.0,2.067502,2.057697
9,2023-05-22 05:00:00,2.077273,2.06034,2.081407,2.07726,2.077286,-0.008226,-0.008226,-0.008226,-1.8e-05,...,0.0,0.0,-0.001798,-0.001798,-0.001798,0.0,0.0,0.0,2.070995,2.060907


In [10]:
results_df = results_df.reset_index(drop=True)

In [11]:
import plotly.graph_objects as go
from plotly_resampler import FigureResampler, FigureWidgetResampler
 
 
fig = FigureWidgetResampler(go.Figure())
fig.update_layout(margin=dict(l=10, r=10, t=10, b=10))
fig.add_trace(go.Scattergl(name=r'Forecast', showlegend=True), hf_x=results_df["ds"], hf_y=results_df['yhat'])
fig.add_trace(go.Scattergl(name=r'True', showlegend=True), hf_x=results_df["ds"], hf_y=results_df['ytrue'])
fig.update_layout(height=400, template="plotly_dark")
display(fig)

FigureWidgetResampler({
    'data': [{'name': 'Forecast',
              'showlegend': True,
              'type': 'scattergl',
              'uid': '0c1592f5-e83c-4036-98a1-9db409487c00',
              'x': array([datetime.datetime(2023, 5, 21, 20, 0),
                          datetime.datetime(2023, 5, 21, 21, 0),
                          datetime.datetime(2023, 5, 21, 22, 0),
                          datetime.datetime(2023, 5, 21, 23, 0),
                          datetime.datetime(2023, 5, 22, 0, 0),
                          datetime.datetime(2023, 5, 22, 1, 0),
                          datetime.datetime(2023, 5, 22, 2, 0),
                          datetime.datetime(2023, 5, 22, 3, 0),
                          datetime.datetime(2023, 5, 22, 4, 0),
                          datetime.datetime(2023, 5, 22, 5, 0),
                          datetime.datetime(2023, 5, 22, 6, 0),
                          datetime.datetime(2023, 5, 22, 7, 0),
                          datetime.datet

In [12]:
holiday_df

Unnamed: 0,ds,holiday,lower_window,upper_window
84,2023-05-04 12:00:00,rainy_day,0,1
85,2023-05-04 13:00:00,rainy_day,0,1
86,2023-05-04 14:00:00,rainy_day,0,1
87,2023-05-04 15:00:00,rainy_day,0,1
88,2023-05-04 16:00:00,rainy_day,0,1
...,...,...,...,...
4218,2023-10-23 18:00:00,rainy_day,0,1
4220,2023-10-23 20:00:00,rainy_day,0,1
4221,2023-10-23 21:00:00,rainy_day,0,1
4223,2023-10-23 23:00:00,rainy_day,0,1
