In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
# Load dataset
data = pd.read_csv('/content/AirQualityUCI.csv', sep=';', decimal=',')

In [3]:
# Drop columns with non-numeric data (like date/time) and last two columns with null values
data = data.select_dtypes(include=[np.number])
data = data.iloc[:, :-2]  # Remove the last two columns which contain null values

In [4]:
# Drop rows with NaN values
data = data.dropna()

In [5]:
# Selecting the relevant features and target (adjust as needed)
features = data.iloc[:, :-1]
target = data.iloc[:, -1]

In [6]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [7]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [8]:
# Reshape data for RNN input
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [9]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

In [10]:
# Create a custom dataset class
class AirQualityDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [11]:
# Create DataLoaders
train_dataset = AirQualityDataset(X_train_tensor, y_train_tensor)
test_dataset = AirQualityDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [12]:
# Define the Bidirectional RNN model
class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, pooling):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.pooling = pooling
        self.fc = nn.Linear(hidden_size * 2, output_size)  # Multiply by 2 for bidirectionality

    def forward(self, x):
        out, _ = self.rnn(x)
        if self.pooling == 'max':
            out, _ = torch.max(out, dim=1)
        elif self.pooling == 'avg':
            out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out

In [13]:
# Training and evaluation function
def train_and_evaluate(hidden_size=64, pooling='avg', epochs=50, optimizer_name='Adam'):
    input_size = X_train_tensor.shape[2]
    output_size = 1  # Assuming a regression task
    model = BiRNN(input_size, hidden_size, output_size, pooling)

    # Define loss function and optimizer
    criterion = nn.MSELoss()
    if optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    elif optimizer_name == 'RMSProp':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01)
    elif optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5, verbose=True)

    # Early stopping
    best_loss = float('inf')
    patience = 10
    counter = 0

    results = []

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * X_batch.size(0)
        train_loss /= len(train_loader.dataset)

        # Evaluate on test data
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = model(X_batch)
                loss = criterion(outputs.squeeze(), y_batch)
                test_loss += loss.item() * X_batch.size(0)
        test_loss /= len(test_loader.dataset)

        scheduler.step(test_loss)

        results.append((epoch + 1, train_loss, test_loss))

        if test_loss < best_loss:
            best_loss = test_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered")
                break

    return results

In [14]:
# Store and display results
all_results = {}

In [15]:
# Experiment with different hidden sizes
hidden_sizes = [32, 64, 128]
for hidden_size in hidden_sizes:
    print(f"\nExperiment with Hidden Size: {hidden_size}")
    all_results[f"Hidden Size {hidden_size}"] = train_and_evaluate(hidden_size=hidden_size)


Experiment with Hidden Size: 32





Experiment with Hidden Size: 64

Experiment with Hidden Size: 128


In [16]:
# Experiment with different pooling methods
pooling_methods = ['max', 'avg']
for pooling in pooling_methods:
    print(f"\nExperiment with Pooling Method: {pooling}")
    all_results[f"Pooling {pooling}"] = train_and_evaluate(pooling=pooling)


Experiment with Pooling Method: max

Experiment with Pooling Method: avg


In [17]:
# Experiment with different epochs
epochs_list = [5, 50, 100, 250, 350]
for epochs in epochs_list:
    print(f"\nExperiment with Epochs: {epochs}")
    all_results[f"Epochs {epochs}"] = train_and_evaluate(epochs=epochs)


Experiment with Epochs: 5

Experiment with Epochs: 50

Experiment with Epochs: 100

Experiment with Epochs: 250
Early stopping triggered

Experiment with Epochs: 350
Early stopping triggered


In [18]:
# Experiment with different optimizers
optimizers = ['SGD', 'RMSProp', 'Adam']
for optimizer_name in optimizers:
    print(f"\nExperiment with Optimizer: {optimizer_name}")
    all_results[f"Optimizer {optimizer_name}"] = train_and_evaluate(optimizer_name=optimizer_name)


Experiment with Optimizer: SGD

Experiment with Optimizer: RMSProp
Early stopping triggered

Experiment with Optimizer: Adam


In [21]:
# Display all results
for key, result in all_results.items():
    print(f"\nResults for {key}")
    for epoch, train_loss, test_loss in result:
        print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Test Loss = {test_loss:.4f}")


Results for Hidden Size 32
Epoch 1: Train Loss = 863.3726, Test Loss = 354.9443
Epoch 2: Train Loss = 165.3560, Test Loss = 47.8526
Epoch 3: Train Loss = 18.4349, Test Loss = 3.5783
Epoch 4: Train Loss = 1.3611, Test Loss = 0.5251
Epoch 5: Train Loss = 0.4617, Test Loss = 0.0984
Epoch 6: Train Loss = 0.1341, Test Loss = 0.1811
Epoch 7: Train Loss = 0.1170, Test Loss = 0.0793
Epoch 8: Train Loss = 0.0998, Test Loss = 0.1389
Epoch 9: Train Loss = 0.1630, Test Loss = 0.1161
Epoch 10: Train Loss = 0.1205, Test Loss = 0.1926
Epoch 11: Train Loss = 0.2877, Test Loss = 0.1789
Epoch 12: Train Loss = 0.1360, Test Loss = 0.0720
Epoch 13: Train Loss = 0.0785, Test Loss = 0.0930
Epoch 14: Train Loss = 0.0603, Test Loss = 0.0910
Epoch 15: Train Loss = 0.1092, Test Loss = 0.1031
Epoch 16: Train Loss = 0.1450, Test Loss = 0.4995
Epoch 17: Train Loss = 0.2076, Test Loss = 0.0942
Epoch 18: Train Loss = 0.0968, Test Loss = 0.1277
Epoch 19: Train Loss = 0.0281, Test Loss = 0.0307
Epoch 20: Train Loss = 