In [14]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # get the last time step's output
        return out

def train_RNN(data, features, target, date_col, n_ahead, year_test_start, year_test_end):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Select relevant columns
    rel_col = features + [target] + [date_col]
    data = data[rel_col]
    data[date_col] = pd.to_datetime(data[date_col])

    # Create lag features
    env_lags = [1, 2, 3, 4]
    cases_lag = range(n_ahead, 12)

    for lag in env_lags:
        for feature in features:
            data[f'{feature}_lag_{lag}'] = data[feature].shift(lag)

    for lag in cases_lag:
        data[f'{target}_lag_{lag}'] = data[target].shift(lag)

    data = data.dropna()

    # Split data
    train_data = data[data[date_col] < year_test_start]
    test_data = data[(data[date_col] >= year_test_start) & (data[date_col] < year_test_end)]

    X_train = train_data.drop(columns=[target, date_col])
    y_train = train_data[target]
    X_test = test_data.drop(columns=[target, date_col])
    y_test = test_data[target]

    # Reshape inputs to [batch_size, sequence_length, input_size]
    # Here we treat each row as a "sequence" of length 1
    X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1).to(device)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1).to(device)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).to(device)

    # Dataset and loader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)

    # Model
    input_size = X_train_tensor.shape[2]
    hidden_size = 64
    model = RNNModel(input_size=input_size, hidden_size=hidden_size).to(device)

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss()

    # Training loop
    model.train()
    for epoch in range(100):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            predictions = model(batch_X).squeeze()
            loss = loss_fn(predictions, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        predictions = model(X_test_tensor).squeeze().cpu().numpy()
        actual = y_test_tensor.cpu().numpy()

    # Round predictions to integers
    predict_data = [int(round(x)) for x in predictions]

    # Compute metrics
    MAE = mean_absolute_error(actual, predict_data)
    MSE = mean_squared_error(actual, predict_data)

    return predict_data, MAE, MSE


In [15]:
import sys
sys.path.append('..')
import models_utils

In [16]:
municipals = []
with (open("../municipals.txt", "r") as f):
    for line in f:
        municipals.append(line.strip())
municipals

['Ajuy',
 'Alimodian',
 'Anilao',
 'Badiangan',
 'Balasan',
 'Banate',
 'Barotac Nuevo',
 'Barotac Viejo',
 'Batad',
 'Bingawan',
 'Cabatuan',
 'Calinog',
 'Carles',
 'Passi City',
 'Concepcion',
 'Dingle',
 'Duenas',
 'Dumangas',
 'Estancia',
 'Guimbal',
 'Iloilo City',
 'Igbaras',
 'Janiuay',
 'Lambunao',
 'Leganes',
 'Lemery',
 'Leon',
 'Maasin',
 'Miagao',
 'Mina',
 'New Lucena',
 'Oton',
 'Pavia',
 'Pototan',
 'San Dionisio',
 'San Enrique',
 'San Joaquin',
 'San Rafael',
 'Santa Barbara',
 'Sara',
 'Tigbauan',
 'Tubungan',
 'Zarraga']

In [17]:
n_weeks_ahead = [1,2,3,4,8,12]
for municipal in municipals:
    for n in n_weeks_ahead:
        municipal_df = pd.read_csv(f"../../data/Merged Data/{municipal}_merged.csv")
        municipal_df["Year-Week"] = pd.to_datetime(municipal_df["Year-Week"])
        predicted, MAE, MSE = train_RNN(municipal_df, features=["Temperature", "Precipitation", "Humidity"], target="Cases", date_col="Year-Week", n_ahead=n, year_test_start="2023-01-01", year_test_end="2024-12-31")
        models_utils.save_data(municipal, n, MSE, MAE, predicted, municipal_df)