In [12]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from prophet import Prophet

from sklearn.metrics import mean_squared_error
from sklearn import preprocessing

import os
import gc
import sys

from utils import utils, models

In [4]:
os.chdir('..')

In [8]:
daily_df = pd.read_csv('data/daily_data.csv')

In [9]:
daily_df.head()

Unnamed: 0,station,date,temp_max,temp_mean,temp_min,rainfall,snow
0,72202,2013-11-22,26.7,25.957143,24.4,True,False
1,72202,2013-11-23,27.2,25.033333,23.9,True,False
2,72202,2013-11-24,28.3,24.620833,21.7,False,False
3,72202,2013-11-25,25.6,23.179167,21.7,True,False
4,72202,2013-11-26,27.2,25.016667,22.8,True,False


In [15]:
test_date = "2023/9/19"
daily = True

In [None]:
daily_df["date"] = pd.to_datetime(daily_df["date"])


In [43]:
train_data_pd, test_data_pd = models.train_test_split(daily_df, test_date, daily)

In [44]:
train_data_pd.tail()

Unnamed: 0,station,date,temp_max,temp_mean,temp_min,rainfall,snow
76602,PALH0,2023-09-14,11.7,8.820833,7.2,True,False
76603,PALH0,2023-09-15,10.6,8.4125,7.2,True,False
76604,PALH0,2023-09-16,10.6,8.2,6.7,True,False
76605,PALH0,2023-09-17,11.1,9.154167,7.8,True,False
76606,PALH0,2023-09-18,10.6,8.575,6.1,False,False


In [45]:
test_data_pd

Unnamed: 0,station,date,temp_max,temp_mean,temp_min,rainfall,snow
3588,72202,2023-09-19,32.8,28.254167,25.0,True,False
3589,72202,2023-09-20,32.8,27.900000,23.3,True,False
3590,72202,2023-09-21,32.8,27.816667,23.9,True,False
3591,72202,2023-09-22,31.7,27.629167,24.4,True,False
3592,72202,2023-09-23,29.4,27.287500,25.0,True,False
...,...,...,...,...,...,...,...
76667,PALH0,2023-11-18,-6.7,-10.191667,-13.3,False,False
76668,PALH0,2023-11-19,-10.6,-13.812500,-16.1,False,False
76669,PALH0,2023-11-20,-10.6,-16.162500,-18.9,False,False
76670,PALH0,2023-11-21,-4.4,-9.512500,-12.8,False,False


# Prophet training

### First for one station 

In [67]:
train_1 = train_data_pd.loc[train_data_pd.station == '72202']
test_1 = test_data_pd.loc[test_data_pd.station == '72202']

In [71]:
train_1 = pd.DataFrame(train_1.rename(columns={'date': 'ds', 'temp_min': 'y'}))
test_1 = pd.DataFrame(test_1.rename(columns={'date': 'ds', 'temp_min': 'y'}))

In [72]:
train_1.head()

Unnamed: 0,station,ds,temp_max,temp_mean,y,rainfall,snow
0,72202,2013-11-22,26.7,25.957143,24.4,True,False
1,72202,2013-11-23,27.2,25.033333,23.9,True,False
2,72202,2013-11-24,28.3,24.620833,21.7,False,False
3,72202,2013-11-25,25.6,23.179167,21.7,True,False
4,72202,2013-11-26,27.2,25.016667,22.8,True,False


In [73]:
test_1.tail()

Unnamed: 0,station,ds,temp_max,temp_mean,y,rainfall,snow
3648,72202,2023-11-18,26.1,23.8125,22.0,True,False
3649,72202,2023-11-19,29.0,24.333333,21.0,False,False
3650,72202,2023-11-20,29.4,25.304167,20.6,False,False
3651,72202,2023-11-21,28.3,26.345833,24.4,False,False
3652,72202,2023-11-22,28.9,25.5,23.3,False,False


In [76]:
model_p = Prophet()

In [77]:
model_p.fit(train_1[['ds', 'y']])

22:52:40 - cmdstanpy - INFO - Chain [1] start processing
22:52:40 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7faf723dbf40>

In [78]:
future = model_p.make_future_dataframe(periods=65)
future.head()

Unnamed: 0,ds
0,2013-11-22
1,2013-11-23
2,2013-11-24
3,2013-11-25
4,2013-11-26


In [55]:
forecast_1 = model_p.predict(future)

In [98]:
eval_1 = (pd.DataFrame(test_1[['ds', 'y']])
              .merge(forecast_1[['ds', 'yhat']], on='ds')
              )

In [99]:
np.sqrt(mean_squared_error(eval_1.y, eval_1.yhat))

0.6872328244561169

### Adding regressors

In [88]:
model = Prophet()

In [83]:
train_1.columns

Index(['station', 'ds', 'temp_max', 'temp_mean', 'y', 'rainfall', 'snow'], dtype='object')

In [89]:
model.add_regressor(name='temp_max')
model.add_regressor(name='temp_mean')
model.add_regressor(name='rainfall')
model.add_regressor(name='snow')

<prophet.forecaster.Prophet at 0x7faf72c73250>

In [90]:
model.fit(train_1[['ds', 'y', 'temp_max', 'temp_mean', 'rainfall', 'snow']])

22:56:51 - cmdstanpy - INFO - Chain [1] start processing
22:56:51 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x7faf72c73250>

In [92]:
future = future.merge(test_1[['ds', 'temp_max', 'temp_mean', 'rainfall', 'snow']], on='ds') 

In [94]:
forecast_1 = model.predict(future)

In [100]:
eval_2 = (pd.DataFrame(test_1[['ds', 'y']])
              .merge(forecast_1[['ds', 'yhat']], on='ds')
              )

In [104]:
eval_2.head()

Unnamed: 0,ds,y,yhat
0,2023-09-19,25.0,24.905584
1,2023-09-20,23.3,24.341046
2,2023-09-21,23.9,24.209192
3,2023-09-22,24.4,24.422194
4,2023-09-23,25.0,25.039247


In [102]:
np.sqrt(mean_squared_error(eval_2.y, eval_2.yhat))

0.6872328244561169

#### Avg temp

In [105]:
train_1 = pd.DataFrame(train_1.rename(columns={'date': 'ds', 'y': 'temp_min'})).rename(columns={'temp_mean': 'y'})
test_1 = pd.DataFrame(test_1.rename(columns={'date': 'ds', 'y': 'temp_min'})).rename(columns={'temp_mean': 'y'})

In [106]:
model = Prophet()
model.add_regressor(name='temp_max')
model.add_regressor(name='temp_min')
model.add_regressor(name='rainfall')
model.add_regressor(name='snow')

model.fit(train_1[['ds', 'y', 'temp_max', 'temp_min', 'rainfall', 'snow']])

future = model.make_future_dataframe(periods=65)
future = future.merge(test_1[['ds', 'temp_max', 'temp_min', 'rainfall', 'snow']], on='ds')


23:07:34 - cmdstanpy - INFO - Chain [1] start processing
23:07:34 - cmdstanpy - INFO - Chain [1] done processing


In [107]:
eval = (pd.DataFrame(test_1[['ds', 'y']])
              .merge(forecast_1[['ds', 'yhat']], on='ds')
              )

eval.head()
np.sqrt(mean_squared_error(eval.y, eval.yhat))

2.8903725230407114