In [35]:
import pandas as pd
from statsmodels.tsa.statespace.varmax import VARMAX
from sklearn.preprocessing import StandardScaler

In [36]:
interpolated_weather_df = pd.read_csv('data/region/vietnam/interpolated_weather.csv', index_col=0, parse_dates=True)
interpolated_air_df = pd.read_csv('data/region/vietnam/interpolated_air.csv', index_col=0, parse_dates=True)

In [37]:
interpolated_air_df = interpolated_air_df.reset_index().set_index(['time', 'province'])
interpolated_weather_df = interpolated_weather_df.reset_index().set_index(['time', 'province'])

In [38]:
air_unstacked = interpolated_air_df.unstack(level='province').asfreq('h')
weather_unstacked = interpolated_weather_df.unstack(level='province').asfreq('h')

In [39]:
air_train, air_test = air_unstacked.loc[:'2023-12-31 23:00:00'], air_unstacked.loc['2024-01-01 00:00:00':]
weather_train, weather_test = weather_unstacked.loc[:'2023-12-31 23:00:00'], weather_unstacked.loc['2024-01-01 00:00:00':]

In [40]:
air_scaler = StandardScaler()
weather_scaler = StandardScaler()

In [41]:
air_normalized = air_scaler.fit_transform(air_train.to_numpy())
weather_normalized = weather_scaler.fit_transform(weather_train.to_numpy())

In [43]:
air_normalized = pd.DataFrame(air_normalized, columns=air_train.columns, index=air_train.index)
weather_normalized = pd.DataFrame(weather_normalized, columns=weather_train.columns, index=weather_train.index)

In [10]:
model = VARMAX(air_normalized, exog=weather_normalized, order=(0,2), trend='ct')

In [None]:
results = model.fit(maxiter=1)

In [None]:
results.save('varmax.pkl')