In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA, ARIMAResults
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

Load AQI data

In [19]:
aqi_df = pd.read_csv('data/processed/cleaned/cleaned_air.csv')
aqi_df['time'] = pd.to_datetime(aqi_df['time'])
aqi_df.set_index('time', inplace=True)
aqi_df.index.freq = "h"

Parameter (obtained from data_exploration.ipynb, after EDA process)

In [20]:
p = 2
d = 0
q = 0

Model cross-validation

In [44]:
splits = 5
val_len = 24
aqi_comps = aqi_df.columns
tscv = TimeSeriesSplit(n_splits=splits, test_size=val_len)

In [None]:
mape_list = {}
mase_list = {}

for comp in aqi_comps:
    mape_scores = []
    mase_scores = []
    
    for train_index, val_index in tscv.split(aqi_df):
        train, val = aqi_df[comp].iloc[train_index], aqi_df[comp].iloc[val_index]
        model = ARIMA(train, order=(p, d, q))
        fit = model.fit()
        
        forecast = fit.forecast(steps=len(val))
        
        mape = mean_absolute_percentage_error(val, forecast)
        if mape > 5:
            continue
        mae = mean_absolute_error(val, forecast)
        # Naive prediction: use value at t to guess value at t + len(val) 
        naive_mae = np.mean(np.abs(train[-len(val):].values - val.values))
        mase = mae / naive_mae
        mape_scores.append(mape)
        mase_scores.append(mase)
    
    mape_list[comp] = np.mean(mape_scores)
    mase_list[comp] = np.mean(mase_scores)

In [53]:
results = pd.DataFrame({'mape': mape_list, 'mase': mase_list}).T
results

Unnamed: 0,carbon_monoxide,pm10,pm2_5,nitrogen_dioxide,ozone,sulphur_dioxide
mape,0.386711,0.767774,0.702005,0.803365,0.564204,0.389988
mase,1.460536,0.804525,0.876092,1.146034,1.669699,0.9926


Fit model on full training data

In [50]:
for comp in aqi_comps:
    model = ARIMA(aqi_df[comp], order=(p, d, q))
    fit = model.fit()
    fit.save(f"models/arima/{comp}.pickle")