In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
# Load and clean the dataset
file_path = '/content/AirQualityUCI.csv'
data = pd.read_csv(file_path, delimiter=';')

In [3]:
# Replace commas with dots and convert to numeric
for col in data.columns:
    data[col] = data[col].replace({',': '.'}, regex=True).replace({'-200': np.nan})
    try:
        data[col] = pd.to_numeric(data[col])
    except ValueError:
        pass

In [4]:
# Drop unnecessary columns
data = data.drop(columns=[col for col in data.columns if 'Unnamed' in col])
data = data.dropna()


In [5]:
# Select features and target
features = data.iloc[:, 2:].values
target = data.iloc[:, 2].values

In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [7]:
# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Custom Dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, data, targets, sequence_length):
        self.data = data
        self.targets = targets
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.sequence_length]
        y = self.targets[idx + self.sequence_length]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

In [9]:
# RNN Model Class
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Use the last hidden state
        return out

In [10]:
# Training and evaluation logic
def train_model(model, dataloader, criterion, optimizer, num_epochs, scheduler=None):
    model.train()
    train_losses = []

    for epoch in range(num_epochs):
        epoch_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        epoch_loss /= len(dataloader)
        train_losses.append(epoch_loss)

        if scheduler:
            scheduler.step(epoch_loss)

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    return train_losses

In [11]:
# Evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    predictions, actuals = [], []

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(targets.cpu().numpy())

    return np.array(predictions), np.array(actuals)

In [12]:
# Hyperparameters
sequence_length = 10
hidden_sizes = [32, 64, 128]
pooling_methods = ['max', 'avg']
num_epochs_list = [5, 50, 100, 250, 350]
optimizers = {'SGD': torch.optim.SGD, 'RMSProp': torch.optim.RMSprop, 'Adam': torch.optim.Adam}


In [13]:
# Prepare datasets
train_dataset = TimeSeriesDataset(X_train, y_train, sequence_length)
test_dataset = TimeSeriesDataset(X_test, y_test, sequence_length)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [14]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [15]:
# Experiment by Hidden Size
hidden_size_results = []
input_size = X_train.shape[1]
output_size = 1

for hidden_size in hidden_sizes:
    print(f"\nTraining with Hidden Size: {hidden_size}")
    model = RNNModel(input_size, hidden_size, output_size).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    train_losses = train_model(model, train_loader, criterion, optimizer, num_epochs=50)
    predictions, actuals = evaluate_model(model, test_loader)

    hidden_size_results.append({
        'hidden_size': hidden_size,
        'train_losses': train_losses,
        'predictions': predictions,
        'actuals': actuals
    })



Training with Hidden Size: 32


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/50, Loss: 544.0543
Epoch 2/50, Loss: 543.9733
Epoch 3/50, Loss: 543.8724
Epoch 4/50, Loss: 543.8733
Epoch 5/50, Loss: 549.7223
Epoch 6/50, Loss: 549.7887
Epoch 7/50, Loss: 543.8348
Epoch 8/50, Loss: 549.7265
Epoch 9/50, Loss: 543.8219
Epoch 10/50, Loss: 543.8522
Epoch 11/50, Loss: 543.8349
Epoch 12/50, Loss: 543.7846
Epoch 13/50, Loss: 555.6043
Epoch 14/50, Loss: 543.8800
Epoch 15/50, Loss: 543.7971
Epoch 16/50, Loss: 543.8803
Epoch 17/50, Loss: 549.7101
Epoch 18/50, Loss: 549.6907
Epoch 19/50, Loss: 543.8462
Epoch 20/50, Loss: 543.8107
Epoch 21/50, Loss: 549.7107
Epoch 22/50, Loss: 543.7966
Epoch 23/50, Loss: 543.8137
Epoch 24/50, Loss: 543.8315
Epoch 25/50, Loss: 543.7949
Epoch 26/50, Loss: 543.7853
Epoch 27/50, Loss: 543.8059
Epoch 28/50, Loss: 543.8106
Epoch 29/50, Loss: 543.7909
Epoch 30/50, Loss: 543.7954
Epoch 31/50, Loss: 543.7781
Epoch 32/50, Loss: 549.7109
Epoch 33/50, Loss: 543.7928
Epoch 34/50, Loss: 543.8376
Epoch 35/50, Loss: 543.8074
Epoch 36/50, Loss: 543.8014
E

In [16]:
# Experiment by Pooling Method
pooling_results = []
for pooling in pooling_methods:
    print(f"\nTraining with Pooling Method: {pooling}")
    model = RNNModel(input_size, 64, output_size).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    train_losses = train_model(model, train_loader, criterion, optimizer, num_epochs=50)
    predictions, actuals = evaluate_model(model, test_loader)

    pooling_results.append({
        'pooling': pooling,
        'train_losses': train_losses,
        'predictions': predictions,
        'actuals': actuals
    })



Training with Pooling Method: max
Epoch 1/50, Loss: 544.1532
Epoch 2/50, Loss: 543.9783
Epoch 3/50, Loss: 543.9233
Epoch 4/50, Loss: 543.8471
Epoch 5/50, Loss: 543.9037
Epoch 6/50, Loss: 543.8684
Epoch 7/50, Loss: 543.8751
Epoch 8/50, Loss: 543.8215
Epoch 9/50, Loss: 543.8635
Epoch 10/50, Loss: 543.8293
Epoch 11/50, Loss: 543.8523
Epoch 12/50, Loss: 543.8376
Epoch 13/50, Loss: 543.8639
Epoch 14/50, Loss: 543.7895
Epoch 15/50, Loss: 543.8590
Epoch 16/50, Loss: 543.7986
Epoch 17/50, Loss: 543.8316
Epoch 18/50, Loss: 549.6956
Epoch 19/50, Loss: 549.7334
Epoch 20/50, Loss: 543.8167
Epoch 21/50, Loss: 549.7029
Epoch 22/50, Loss: 543.8029
Epoch 23/50, Loss: 555.5713
Epoch 24/50, Loss: 543.8219
Epoch 25/50, Loss: 543.8374
Epoch 26/50, Loss: 543.8331
Epoch 27/50, Loss: 543.8178
Epoch 28/50, Loss: 543.8316
Epoch 29/50, Loss: 549.7506
Epoch 30/50, Loss: 543.8209
Epoch 31/50, Loss: 543.8150
Epoch 32/50, Loss: 543.8023
Epoch 33/50, Loss: 549.6919
Epoch 34/50, Loss: 543.8438
Epoch 35/50, Loss: 543

In [17]:
# Experiment by Epochs
epoch_results = []
for num_epochs in num_epochs_list:
    print(f"\nTraining with Epochs: {num_epochs}")
    model = RNNModel(input_size, 64, output_size).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    train_losses = train_model(model, train_loader, criterion, optimizer, num_epochs=num_epochs)
    predictions, actuals = evaluate_model(model, test_loader)

    epoch_results.append({
        'num_epochs': num_epochs,
        'train_losses': train_losses,
        'predictions': predictions,
        'actuals': actuals
    })


Training with Epochs: 5
Epoch 1/5, Loss: 543.9992
Epoch 2/5, Loss: 543.9518
Epoch 3/5, Loss: 543.9458
Epoch 4/5, Loss: 543.8332
Epoch 5/5, Loss: 549.7256

Training with Epochs: 50
Epoch 1/50, Loss: 549.9702
Epoch 2/50, Loss: 549.8713
Epoch 3/50, Loss: 549.7652
Epoch 4/50, Loss: 543.8565
Epoch 5/50, Loss: 543.9096
Epoch 6/50, Loss: 543.8428
Epoch 7/50, Loss: 543.8900
Epoch 8/50, Loss: 543.8818
Epoch 9/50, Loss: 543.8413
Epoch 10/50, Loss: 543.8485
Epoch 11/50, Loss: 543.8119
Epoch 12/50, Loss: 543.8598
Epoch 13/50, Loss: 543.8039
Epoch 14/50, Loss: 543.8503
Epoch 15/50, Loss: 543.8571
Epoch 16/50, Loss: 543.8786
Epoch 17/50, Loss: 543.8119
Epoch 18/50, Loss: 543.8171
Epoch 19/50, Loss: 543.8400
Epoch 20/50, Loss: 543.8110
Epoch 21/50, Loss: 543.7967
Epoch 22/50, Loss: 543.8134
Epoch 23/50, Loss: 543.8034
Epoch 24/50, Loss: 555.6179
Epoch 25/50, Loss: 543.8386
Epoch 26/50, Loss: 549.6946
Epoch 27/50, Loss: 543.8584
Epoch 28/50, Loss: 543.8086
Epoch 29/50, Loss: 549.7146
Epoch 30/50, Los

In [18]:
# Experiment by Optimizer
optimizer_results = []
for opt_name, opt_fn in optimizers.items():
    print(f"\nTraining with Optimizer: {opt_name}")
    model = RNNModel(input_size, 64, output_size).to(device)
    criterion = nn.MSELoss()
    optimizer = opt_fn(model.parameters(), lr=0.001)

    train_losses = train_model(model, train_loader, criterion, optimizer, num_epochs=50)
    predictions, actuals = evaluate_model(model, test_loader)

    optimizer_results.append({
        'optimizer': opt_name,
        'train_losses': train_losses,
        'predictions': predictions,
        'actuals': actuals
    })


Training with Optimizer: SGD
Epoch 1/50, Loss: 543.9439
Epoch 2/50, Loss: 543.8221
Epoch 3/50, Loss: 543.8209
Epoch 4/50, Loss: 543.8326
Epoch 5/50, Loss: 543.8026
Epoch 6/50, Loss: 543.8191
Epoch 7/50, Loss: 543.8366
Epoch 8/50, Loss: 549.6849
Epoch 9/50, Loss: 543.7990
Epoch 10/50, Loss: 543.8082
Epoch 11/50, Loss: 543.7829
Epoch 12/50, Loss: 543.7906
Epoch 13/50, Loss: 543.7772
Epoch 14/50, Loss: 543.8159
Epoch 15/50, Loss: 543.8043
Epoch 16/50, Loss: 543.8044
Epoch 17/50, Loss: 543.7818
Epoch 18/50, Loss: 543.8019
Epoch 19/50, Loss: 543.8155
Epoch 20/50, Loss: 543.7926
Epoch 21/50, Loss: 549.6802
Epoch 22/50, Loss: 543.7952
Epoch 23/50, Loss: 543.8071
Epoch 24/50, Loss: 543.7895
Epoch 25/50, Loss: 555.5855
Epoch 26/50, Loss: 543.7961
Epoch 27/50, Loss: 549.6793
Epoch 28/50, Loss: 543.7903
Epoch 29/50, Loss: 543.7933
Epoch 30/50, Loss: 543.7774
Epoch 31/50, Loss: 549.6931
Epoch 32/50, Loss: 543.7963
Epoch 33/50, Loss: 543.8018
Epoch 34/50, Loss: 543.7962
Epoch 35/50, Loss: 543.7861

In [20]:
# Save results
hidden_size_df = pd.DataFrame(hidden_size_results)
hidden_size_df.to_csv("hidden_size_experiment.csv", index=False)

pooling_df = pd.DataFrame(pooling_results)
pooling_df.to_csv("pooling_experiment.csv", index=False)

optimizer_df = pd.DataFrame(optimizer_results)
optimizer_df.to_csv("optimizer_experiment.csv", index=False)

epoch_df = pd.DataFrame(epoch_results)
epoch_df.to_csv("epoch_experiment.csv", index=False)

print("Separate experiments complete. Results saved.")


Separate experiments complete. Results saved.
