In [42]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd

def train_ARIMA_n_ahead(data, target, date_col, year_test_start, year_test_end, n_ahead=1, arima_order=(1,1,1), date_limit='2024-12-31'):
    # Ensure that the date limit is in the correct format
    date_limit = pd.to_datetime(date_limit)

    # Prepare the data
    data[date_col] = pd.to_datetime(data[date_col])
    data = data.sort_values(date_col).reset_index(drop=True)
    data = data[['Year-Week', target]].copy()
    data.set_index(date_col, inplace=True)

    # Set frequency and handle missing data
    data = data.asfreq('W', method='ffill')
    print(data.head())

    if data.isnull().all().any():
        print("Data contains missing values after frequency setting. Check your data.")
        return [], None, None

    # Filter the data to make sure it's <= 2024-12-31
    data = data[data.index <= date_limit]

    # Filter for just date and target
    series = data[[target]].dropna()

    # Check if train/test splits are valid
    if series[series.index < year_test_start].empty or series[(series.index >= year_test_start) & (series.index < year_test_end)].empty:
        print("Train or test dataset is empty. Check your date range.")
        return [], None, None

    train = series[series.index < year_test_start]
    test = series[(series.index >= year_test_start) & (series.index < year_test_end)]

    predictions = []
    actuals = []

    # Adjust test_dates to ensure they are within bounds
    test_dates = test.index[n_ahead-1:]
    if len(test_dates) == 0:
        print("Test dataset is too small for forecasting. Check the test range.")
        return [], None, None

    for current_date in test_dates:
        # Define training window ending n_ahead steps before the current date
        end_index = series.index.get_loc(current_date) - n_ahead
        train_series = series.iloc[:end_index + 1][target]

        # Check if train_series has data
        if train_series.empty:
            print(f"Training data is empty for date {current_date}.")
            continue

        # Fit ARIMA on current train data
        model = ARIMA(train_series, order=arima_order)
        fitted = model.fit()

        # Forecast n_ahead steps forward
        forecast = fitted.forecast(steps=n_ahead)
        predictions.append(int(round(forecast[-1])))  # take the last step
        actuals.append(int(series.loc[current_date][target]))

    if not predictions:
        print("No predictions were made. Check your data and training process.")
        return [], None, None
    # Make sure predictions and actuals are of the length 104, remove the last n_ahead-1
    predictions = predictions[:104]
    actuals = actuals[:104]
    # Evaluate
    MAE = mean_absolute_error(actuals, predictions)
    MSE = mean_squared_error(actuals, predictions)

    return predictions, MAE, MSE




In [43]:
import sys
sys.path.append('..')
import models_utils

In [44]:
municipals = []
with (open("../municipals.txt", "r") as f):
    for line in f:
        municipals.append(line.strip())
municipals

['Ajuy',
 'Alimodian',
 'Anilao',
 'Badiangan',
 'Balasan',
 'Banate',
 'Barotac Nuevo',
 'Barotac Viejo',
 'Batad',
 'Bingawan',
 'Cabatuan',
 'Calinog',
 'Carles',
 'Passi City',
 'Concepcion',
 'Dingle',
 'Duenas',
 'Dumangas',
 'Estancia',
 'Guimbal',
 'Iloilo City',
 'Igbaras',
 'Janiuay',
 'Lambunao',
 'Leganes',
 'Lemery',
 'Leon',
 'Maasin',
 'Miagao',
 'Mina',
 'New Lucena',
 'Oton',
 'Pavia',
 'Pototan',
 'San Dionisio',
 'San Enrique',
 'San Joaquin',
 'San Rafael',
 'Santa Barbara',
 'Sara',
 'Tigbauan',
 'Tubungan',
 'Zarraga']

In [45]:
n_weeks_ahead = [1,2,3,4,8,12]
for municipal in municipals:
    for n in n_weeks_ahead:
        municipal_df = pd.read_csv(f"../../data/Merged Data/{municipal}_merged.csv")
        municipal_df["Year-Week"] = pd.to_datetime(municipal_df["Year-Week"])
        print(municipal_df.head())
        predicted, MAE, MSE = train_ARIMA_n_ahead(municipal_df, target="Cases", n_ahead = n, date_col="Year-Week", year_test_start="2023-01-01", year_test_end="2024-12-31")
        models_utils.save_data(municipal, n, MSE, MAE, predicted, municipal_df)

   Year-Week  Temperature   Humidity  Precipitation  Cases
0 2014-01-06    27.099500  84.220718      39.099998    0.0
1 2014-01-13    25.875213  86.147616      83.799999    0.0
2 2014-01-20    24.560928  82.840939     154.400004    0.0
3 2014-01-27    24.875214  75.478272      17.400001    0.0
4 2014-02-03    25.760928  76.548491      21.899999    0.0
            Cases
Year-Week        
2014-01-12    0.0
2014-01-19    0.0
2014-01-26    0.0
2014-02-02    0.0
2014-02-09    0.0


  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(for

   Year-Week  Temperature   Humidity  Precipitation  Cases
0 2014-01-06    27.099500  84.220718      39.099998    0.0
1 2014-01-13    25.875213  86.147616      83.799999    0.0
2 2014-01-20    24.560928  82.840939     154.400004    0.0
3 2014-01-27    24.875214  75.478272      17.400001    0.0
4 2014-02-03    25.760928  76.548491      21.899999    0.0
            Cases
Year-Week        
2014-01-12    0.0
2014-01-19    0.0
2014-01-26    0.0
2014-02-02    0.0
2014-02-09    0.0


  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(for

   Year-Week  Temperature   Humidity  Precipitation  Cases
0 2014-01-06    27.099500  84.220718      39.099998    0.0
1 2014-01-13    25.875213  86.147616      83.799999    0.0
2 2014-01-20    24.560928  82.840939     154.400004    0.0
3 2014-01-27    24.875214  75.478272      17.400001    0.0
4 2014-02-03    25.760928  76.548491      21.899999    0.0
            Cases
Year-Week        
2014-01-12    0.0
2014-01-19    0.0
2014-01-26    0.0
2014-02-02    0.0
2014-02-09    0.0


  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(forecast[-1])))  # take the last step
  predictions.append(int(round(for

ValueError: array length 103 does not match index length 104

In [36]:
len(predicted)

105