In [5]:
import sys
sys.path.append('..')
import models_utils
import warnings
warnings.filterwarnings("ignore")


In [6]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd

def train_ARIMA_n_ahead(data, target, date_col, year_test_start, n_ahead=1, arima_order=(1, 1, 1), min_predictions=104):
    # Convert and sort dates
    data[date_col] = pd.to_datetime(data[date_col])
    data = data.sort_values(by=date_col).reset_index(drop=True)

    # Resample weekly, fill gaps
    data.set_index(date_col, inplace=True)
    data = data.asfreq('W-MON')
    data[target] = data[target].ffill()

    # Check if still missing
    if data[target].isna().sum() > 0:
        print("Warning: Missing data remains after resampling.")
        return [], None, None

    series = data[[target]]

    # Split into train/test
    train = series[series.index < year_test_start]
    test = series[series.index >= year_test_start]

    if train.empty or test.empty:
        print("Train or test dataset is empty. Check your date range.")
        return [], None, None

    # Generate test dates for prediction
    test_dates = test.index[n_ahead - 1:]

    if len(test_dates) < min_predictions:
        print(f"Not enough test dates to make {min_predictions} predictions. Only {len(test_dates)} available.")
        return [], None, None

    predictions = []
    actuals = []

    for current_date in test_dates:
        try:
            current_loc = series.index.get_loc(current_date)
            end_loc = current_loc - n_ahead
            if end_loc < 0:
                print(f"Skipping prediction at {current_date}: Not enough history.")
                continue

            train_series = series.iloc[:end_loc + 1][target]

            model = ARIMA(train_series, order=arima_order)
            fitted_model = model.fit()
            forecast = fitted_model.forecast(steps=n_ahead)

            predicted_value = forecast[-1]
            actual_value = series.loc[current_date][target]

            predictions.append(round(predicted_value))
            actuals.append(round(actual_value))

        except Exception as e:
            print(f"Error at {current_date}: {e}")

    # Filter out mismatches
    if len(predictions) < min_predictions:
        print(f"Only {len(predictions)} valid predictions after filtering.")
        return [], None, None

    predictions = predictions[:min_predictions]
    actuals = actuals[:min_predictions]

    # Compute metrics
    MAE = mean_absolute_error(actuals, predictions)
    MSE = mean_squared_error(actuals, predictions)

    return predictions, MAE, MSE


In [7]:
municipals = []
with (open("../municipals.txt", "r") as f):
    for line in f:
        municipals.append(line.strip())
municipals

['Ajuy',
 'Alimodian',
 'Anilao',
 'Badiangan',
 'Balasan',
 'Banate',
 'Barotac Nuevo',
 'Barotac Viejo',
 'Batad',
 'Bingawan',
 'Cabatuan',
 'Calinog',
 'Carles',
 'Passi City',
 'Concepcion',
 'Dingle',
 'Duenas',
 'Dumangas',
 'Estancia',
 'Guimbal',
 'Iloilo City',
 'Igbaras',
 'Janiuay',
 'Lambunao',
 'Leganes',
 'Lemery',
 'Leon',
 'Maasin',
 'Miagao',
 'Mina',
 'New Lucena',
 'Oton',
 'Pavia',
 'Pototan',
 'San Dionisio',
 'San Enrique',
 'San Joaquin',
 'San Miguel',
 'San Rafael',
 'Santa Barbara',
 'Sara',
 'Tigbauan',
 'Tubungan',
 'Zarraga']

In [8]:
n_weeks_ahead = [1,2,3,4,5,6,7,8,9,10,12]
for municipal in municipals:
    municipal_df = pd.read_csv(f"../../data/Merged Data/{municipal}_merged.csv")
    municipal_df["Year-Week"] = pd.to_datetime(municipal_df["Year-Week"])
    for n in n_weeks_ahead:
        # minus n_week for 2023-01-01
        n_date = pd.to_datetime("2023-01-01") - pd.DateOffset(weeks=n)
        print(n_date)
        predicted, MAE, MSE = train_ARIMA_n_ahead(municipal_df, target="Cases", n_ahead = n, date_col="Year-Week", year_test_start=n_date)
        models_utils.save_data(municipal, n, MSE, MAE, predicted, municipal_df, type="Municipal")

2022-12-25 00:00:00
2022-12-18 00:00:00
2022-12-11 00:00:00
2022-12-04 00:00:00
2022-11-27 00:00:00
2022-11-20 00:00:00
2022-11-13 00:00:00
2022-11-06 00:00:00
2022-10-30 00:00:00
2022-10-23 00:00:00
2022-10-09 00:00:00
2022-12-25 00:00:00
2022-12-18 00:00:00
2022-12-11 00:00:00
2022-12-04 00:00:00
2022-11-27 00:00:00
2022-11-20 00:00:00
2022-11-13 00:00:00
2022-11-06 00:00:00
2022-10-30 00:00:00
2022-10-23 00:00:00
2022-10-09 00:00:00
2022-12-25 00:00:00
2022-12-18 00:00:00
2022-12-11 00:00:00
2022-12-04 00:00:00
2022-11-27 00:00:00
2022-11-20 00:00:00
2022-11-13 00:00:00
2022-11-06 00:00:00
2022-10-30 00:00:00
2022-10-23 00:00:00
2022-10-09 00:00:00
2022-12-25 00:00:00
2022-12-18 00:00:00
2022-12-11 00:00:00
2022-12-04 00:00:00
2022-11-27 00:00:00
2022-11-20 00:00:00
2022-11-13 00:00:00
2022-11-06 00:00:00
2022-10-30 00:00:00
2022-10-23 00:00:00
2022-10-09 00:00:00
2022-12-25 00:00:00
2022-12-18 00:00:00
2022-12-11 00:00:00
2022-12-04 00:00:00
2022-11-27 00:00:00
2022-11-20 00:00:00


In [36]:
len(predicted)

105