In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd


In [10]:
# Load and preprocess the data
data = pd.read_csv('/content/AirQualityUCI.csv', sep=';', decimal=',')
data = data.dropna(subset=['CO(GT)'])  # Drop rows with missing target

In [11]:
# Select features and target
features = data.iloc[:, :-2].select_dtypes(include=[np.number]).fillna(0).values
target = data['CO(GT)'].values


In [12]:
# Normalize features
scaler = StandardScaler()
features = scaler.fit_transform(features)

In [13]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

class AirQualityDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = AirQualityDataset(X_train, y_train)
test_dataset = AirQualityDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [14]:
# Define RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, pooling):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.pooling = pooling
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.rnn(x)
        if self.pooling == "max":
            out, _ = torch.max(out, dim=1)
        elif self.pooling == "avg":
            out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out.squeeze()

In [15]:
# Training and evaluation function
def train_and_evaluate(model, optimizer, criterion, train_loader, test_loader, scheduler=None, early_stopper=None, epochs=100):
    epoch_list = []
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch.unsqueeze(1))
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                y_pred = model(X_batch.unsqueeze(1))
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item()

        val_loss /= len(test_loader)
        val_losses.append(val_loss)

        epoch_list.append(epoch + 1)

        if scheduler:
            scheduler.step(val_loss)

        if early_stopper and early_stopper.step(val_loss):
            break

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

    return epoch_list, train_losses, val_losses

In [16]:
# Experiment configurations
hidden_sizes = [10, 50, 100]
pooling_methods = ["max", "avg"]
optimizers = {"SGD": optim.SGD, "RMSProp": optim.RMSprop, "Adam": optim.Adam}
epochs_list = [5, 50, 100, 250, 350]

In [17]:
# Hidden size experiment
hidden_size_results = []
for hidden_size in hidden_sizes:
    print(f"Testing hidden_size={hidden_size}")
    model = RNNModel(input_size=X_train.shape[1], hidden_size=hidden_size, num_layers=1, pooling="avg")
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True)

    epoch_list, train_losses, val_losses = train_and_evaluate(
        model, optimizer, criterion, train_loader, test_loader, scheduler=scheduler, epochs=50
    )
    hidden_size_results.append({
        "hidden_size": hidden_size,
        "epoch_list": epoch_list,
        "train_losses": train_losses,
        "val_losses": val_losses
    })

Testing hidden_size=10




Epoch 1/50, Train Loss: 6404.9417, Val Loss: 5354.5866
Epoch 2/50, Train Loss: 4838.5961, Val Loss: 4039.4716
Epoch 3/50, Train Loss: 3628.0711, Val Loss: 3002.8350
Epoch 4/50, Train Loss: 2676.6573, Val Loss: 2192.0042
Epoch 5/50, Train Loss: 1930.5843, Val Loss: 1558.5019
Epoch 6/50, Train Loss: 1358.3764, Val Loss: 1080.7900
Epoch 7/50, Train Loss: 928.8834, Val Loss: 728.3989
Epoch 8/50, Train Loss: 616.5292, Val Loss: 475.6216
Epoch 9/50, Train Loss: 397.8060, Val Loss: 303.1996
Epoch 10/50, Train Loss: 251.9330, Val Loss: 189.6484
Epoch 11/50, Train Loss: 157.9030, Val Loss: 120.3522
Epoch 12/50, Train Loss: 101.9843, Val Loss: 80.3320
Epoch 13/50, Train Loss: 70.7303, Val Loss: 58.1701
Epoch 14/50, Train Loss: 54.7793, Val Loss: 49.1788
Epoch 15/50, Train Loss: 46.7182, Val Loss: 42.4833
Epoch 16/50, Train Loss: 43.0471, Val Loss: 39.9090
Epoch 17/50, Train Loss: 41.1366, Val Loss: 38.5858
Epoch 18/50, Train Loss: 39.7697, Val Loss: 37.6949
Epoch 19/50, Train Loss: 38.8388, Val 

In [18]:
# Pooling method experiment
pooling_results = []
for pooling in pooling_methods:
    print(f"Testing pooling method={pooling}")
    model = RNNModel(input_size=X_train.shape[1], hidden_size=50, num_layers=1, pooling=pooling)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True)

    epoch_list, train_losses, val_losses = train_and_evaluate(
        model, optimizer, criterion, train_loader, test_loader, scheduler=scheduler, epochs=50
    )
    pooling_results.append({
        "pooling": pooling,
        "epoch_list": epoch_list,
        "train_losses": train_losses,
        "val_losses": val_losses
    })

Testing pooling method=max
Epoch 1/50, Train Loss: 3934.9736, Val Loss: 1530.3412
Epoch 2/50, Train Loss: 704.3219, Val Loss: 197.4912
Epoch 3/50, Train Loss: 79.0444, Val Loss: 20.2773
Epoch 4/50, Train Loss: 12.5709, Val Loss: 10.1186
Epoch 5/50, Train Loss: 8.5653, Val Loss: 7.8905
Epoch 6/50, Train Loss: 8.1210, Val Loss: 7.2380
Epoch 7/50, Train Loss: 7.4897, Val Loss: 6.9480
Epoch 8/50, Train Loss: 7.0869, Val Loss: 6.6053
Epoch 9/50, Train Loss: 6.7362, Val Loss: 6.0403
Epoch 10/50, Train Loss: 6.0912, Val Loss: 5.5766
Epoch 11/50, Train Loss: 5.5995, Val Loss: 5.0378
Epoch 12/50, Train Loss: 5.3579, Val Loss: 4.5105
Epoch 13/50, Train Loss: 4.4828, Val Loss: 4.0741
Epoch 14/50, Train Loss: 3.7708, Val Loss: 3.1066
Epoch 15/50, Train Loss: 2.8079, Val Loss: 2.2705
Epoch 16/50, Train Loss: 2.1089, Val Loss: 1.7462
Epoch 17/50, Train Loss: 1.6032, Val Loss: 1.3149
Epoch 18/50, Train Loss: 1.1446, Val Loss: 0.9650
Epoch 19/50, Train Loss: 0.9059, Val Loss: 0.5335
Epoch 20/50, Train

In [19]:
# Optimizer experiment
optimizer_results = []
for opt_name, opt_class in optimizers.items():
    print(f"Testing optimizer={opt_name}")
    model = RNNModel(input_size=X_train.shape[1], hidden_size=50, num_layers=1, pooling="avg")
    criterion = nn.MSELoss()
    optimizer = opt_class(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True)

    epoch_list, train_losses, val_losses = train_and_evaluate(
        model, optimizer, criterion, train_loader, test_loader, scheduler=scheduler, epochs=50
    )
    optimizer_results.append({
        "optimizer": opt_name,
        "epoch_list": epoch_list,
        "train_losses": train_losses,
        "val_losses": val_losses
    })

Testing optimizer=SGD
Epoch 1/50, Train Loss: 382.1930, Val Loss: 102.3539
Epoch 2/50, Train Loss: 85.3499, Val Loss: 62.7287
Epoch 3/50, Train Loss: 59.1559, Val Loss: 45.5655
Epoch 4/50, Train Loss: 41.3499, Val Loss: 34.6714
Epoch 5/50, Train Loss: 65.0658, Val Loss: 59.3797
Epoch 6/50, Train Loss: 53.7081, Val Loss: 45.5061
Epoch 7/50, Train Loss: 43.2222, Val Loss: 36.7015
Epoch 8/50, Train Loss: 35.1952, Val Loss: 30.7316
Epoch 9/50, Train Loss: 28.4441, Val Loss: 24.7603
Epoch 10/50, Train Loss: 23.0567, Val Loss: 19.9314
Epoch 11/50, Train Loss: 66.3637, Val Loss: 66.8327
Epoch 12/50, Train Loss: 81.0056, Val Loss: 60.4122
Epoch 13/50, Train Loss: 77.0846, Val Loss: 55.3441
Epoch 14/50, Train Loss: 153.2407, Val Loss: 152.9265
Epoch 15/50, Train Loss: 161.5361, Val Loss: 146.2602
Epoch 16/50, Train Loss: 151.3548, Val Loss: 143.1047
Epoch 17/50, Train Loss: 147.7077, Val Loss: 142.5555
Epoch 18/50, Train Loss: 147.2846, Val Loss: 142.2111
Epoch 19/50, Train Loss: 147.1318, Val 

In [20]:
# Epoch experiment
epoch_results = []
for epochs in epochs_list:
    print(f"Testing epochs={epochs}")
    model = RNNModel(input_size=X_train.shape[1], hidden_size=50, num_layers=1, pooling="avg")
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, verbose=True)

    epoch_list, train_losses, val_losses = train_and_evaluate(
        model, optimizer, criterion, train_loader, test_loader, scheduler=scheduler, epochs=epochs
    )
    epoch_results.append({
        "epochs": epochs,
        "epoch_list": epoch_list,
        "train_losses": train_losses,
        "val_losses": val_losses
    })

Testing epochs=5
Epoch 1/5, Train Loss: 4057.7416, Val Loss: 1553.4634
Epoch 2/5, Train Loss: 716.0662, Val Loss: 199.1928
Epoch 3/5, Train Loss: 77.5697, Val Loss: 17.6646
Epoch 4/5, Train Loss: 10.0251, Val Loss: 6.4084
Epoch 5/5, Train Loss: 6.3561, Val Loss: 5.6322
Testing epochs=50
Epoch 1/50, Train Loss: 3892.2973, Val Loss: 1455.6097
Epoch 2/50, Train Loss: 661.5795, Val Loss: 180.2694
Epoch 3/50, Train Loss: 73.0176, Val Loss: 18.7480
Epoch 4/50, Train Loss: 12.1894, Val Loss: 8.6804
Epoch 5/50, Train Loss: 8.6521, Val Loss: 8.1343
Epoch 6/50, Train Loss: 8.0799, Val Loss: 7.6184
Epoch 7/50, Train Loss: 7.6761, Val Loss: 7.2493
Epoch 8/50, Train Loss: 7.3088, Val Loss: 7.5940
Epoch 9/50, Train Loss: 7.0917, Val Loss: 6.3246
Epoch 10/50, Train Loss: 6.3759, Val Loss: 5.6895
Epoch 11/50, Train Loss: 5.8434, Val Loss: 5.5222
Epoch 12/50, Train Loss: 5.3682, Val Loss: 5.0049
Epoch 13/50, Train Loss: 4.9212, Val Loss: 4.4467
Epoch 14/50, Train Loss: 4.0939, Val Loss: 3.5560
Epoch 15

In [21]:
# Save results
hidden_size_df = pd.DataFrame(hidden_size_results)
hidden_size_df.to_csv("hidden_size_experiment.csv", index=False)

pooling_df = pd.DataFrame(pooling_results)
pooling_df.to_csv("pooling_experiment.csv", index=False)

optimizer_df = pd.DataFrame(optimizer_results)
optimizer_df.to_csv("optimizer_experiment.csv", index=False)

epoch_df = pd.DataFrame(epoch_results)
epoch_df.to_csv("epoch_experiment.csv", index=False)
