In [1]:
import pandas as pd


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings("ignore")

class Model(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropouts):
        super().__init__()
        if hidden_sizes is None:
            hidden_sizes = [48, 32, 19]
        layers = [] # initialize layers
        layers.append(nn.Linear(input_size, hidden_sizes[0])) # first layer
        for i in range(len(hidden_sizes)-1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1])) # append each hidden layer
        self.layers = nn.ModuleList(layers) # read more about module lists for pytorch
        self.output = nn.Linear(hidden_sizes[len(hidden_sizes)-1], output_size)
        self.dropouts = nn.ModuleList([nn.Dropout(dropout) for dropout in dropouts])
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layers[0](x))
        x = self.dropouts[0](x)
        x = self.relu(self.layers[1](x))
        x = self.dropouts[1](x)
        x = self.relu(self.layers[2](x))
        x = self.dropouts[2](x)
        x = self.output(x)
        return x

In [3]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error

def train_ANN(data, features, target, date_col, n_ahead, year_test_start, year_test_end):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # use GPU
    # Select relevant columns
    rel_col = features + [target] + [date_col]
    data = data[rel_col]
    data[date_col] = pd.to_datetime(data[date_col])
    # Define lags
    env_lags = [1,2,3,4]  # 4-week lag for environment features
    cases_lag = range(n_ahead, 12)  # n to 11 week lags for target variable

    # Create lagged features for environment and target variables
    for lag in env_lags:
        for feature in features:
            data[f'{feature}_lag_{lag}'] = data[feature].shift(lag)

    for lag in cases_lag:
        data[f'{target}_lag_{lag}'] = data[target].shift(lag)

    # Remove any rows with missing values due to lagging
    data = data.dropna()

    # Initialize list for storing predictions
    predict_data = []
    actual_data = []
    # Split data into training and testing sets
    train_data = data[data[date_col] < year_test_start]
    test_data = data[(data[date_col] >= year_test_start) & (data[date_col] < year_test_end)]

    X_train = train_data.drop(columns=[target, date_col])
    y_train = train_data[target]
    X_test = test_data.drop(columns=[target, date_col])
    y_test = test_data[target]

    # Convert to PyTorch tensors
    X_train = torch.tensor(X_train.values, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train.values, dtype=torch.float32).to(device)
    X_test = torch.tensor(X_test.values, dtype=torch.float32).to(device)
    y_test = torch.tensor(y_test.values, dtype=torch.float32).to(device)

    # Combine inputs and labels into a Dataset
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)

    # Initialize the ANN Model
    input_size = X_train.shape[1]
    hidden_sizes = [48, 32, 18]
    dropouts = [0.3, 0.2, 0.1]
    output_size = 1
    model = Model(input_size, hidden_sizes, output_size, dropouts)
    model.to(device)

    # Define optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss()
    loss_history = []
    # Train the model
    model.train()
    num_epochs = 100
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()

            # Get model predictions
            predictions = model(batch_X)

            # Compute loss (MSE)
            loss = loss_fn(predictions.squeeze(), batch_y)
            loss_history.append(loss.item())
            # Backpropagation
            loss.backward()
            optimizer.step()

    # Evaluate on test data
    model.eval()
    with torch.no_grad():
        predictions = model(X_test).squeeze().cpu().numpy()
        actual_y = y_test.cpu().numpy()
        predict_data.extend(predictions)
        actual_data.extend(actual_y)
    # Make predictions integer
    predict_data = [int(round(x)) for x in predict_data]
    # Calculate Mean Absolute Error
    MAE = mean_absolute_error(actual_data, predict_data)
    MSE = mean_squared_error(actual_data, predict_data)
    return predict_data, MAE, MSE

In [4]:
import sys
sys.path.append('..')
import models_utils
#Ajuy = pd.read_csv("../../data/Merged Data/Ajuy_merged.csv")
#Ajuy["Year-Week"] = pd.to_datetime(Ajuy["Year-Week"])
#predicted, MAE, MSE = train_ANN(Ajuy, features=["Temperature", "Precipitation", "Humidity"], target="Cases", date_col="Year-Week", n_ahead=1, year_test_start="2023-01-01", year_test_end="2024-12-31")

In [5]:
municipals = []
with (open("../municipals.txt", "r") as f):
    for line in f:
        municipals.append(line.strip())
municipals

['Ajuy',
 'Alimodian',
 'Anilao',
 'Badiangan',
 'Balasan',
 'Banate',
 'Barotac Nuevo',
 'Barotac Viejo',
 'Batad',
 'Bingawan',
 'Cabatuan',
 'Calinog',
 'Carles',
 'Passi City',
 'Concepcion',
 'Dingle',
 'Duenas',
 'Dumangas',
 'Estancia',
 'Guimbal',
 'Iloilo City',
 'Igbaras',
 'Janiuay',
 'Lambunao',
 'Leganes',
 'Lemery',
 'Leon',
 'Maasin',
 'Miagao',
 'Mina',
 'New Lucena',
 'Oton',
 'Pavia',
 'Pototan',
 'San Dionisio',
 'San Enrique',
 'San Joaquin',
 'San Rafael',
 'Santa Barbara',
 'Sara',
 'Tigbauan',
 'Tubungan',
 'Zarraga']

In [7]:
n_weeks_ahead = [1,2,3,4,8,12]
for municipal in municipals:
    for n in n_weeks_ahead:
        municipal_df = pd.read_csv(f"../../data/Merged Data/{municipal}_merged.csv")
        municipal_df["Year-Week"] = pd.to_datetime(municipal_df["Year-Week"])
        predicted, MAE, MSE = train_ANN(municipal_df, features=["Temperature", "Precipitation", "Humidity"], target="Cases", date_col="Year-Week", n_ahead=n, year_test_start="2023-01-01", year_test_end="2024-12-31")
        models_utils.save_data(municipal, n, MSE, MAE, predicted, municipal_df)

NameError: name 'Ajuy' is not defined