## Deep ensembles based training implementation

Implementing the methodology outlined in https://arxiv.org/pdf/1612.01474, except for the adversarial training. To modify to adversarial training, add gaussian noise to the training samples. Credit to the work goes completely to the authors and other parties mentioned in the paper, as per the paper.

### Defining model and loss function

In [91]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from functools import reduce


def _deep_ens_loss_(x, y):
    return torch.mean(torch.log(x[:, 1]**2)/2 + (y - x[:, 0])**2/(2*x[:, 1]**2))

# Define the neural network model
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        # Define layers
        self.hidden = nn.Linear(input_size, hidden_size)
        self.output = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Apply ReLU activation to hidden layer
        x = torch.relu(self.hidden(x))
        # Apply output layer (no activation here, often softmax is applied externally if needed)
        x = self.output(x)
        return x


### Training

In [None]:
# Hyperparameters
input_size = 10     # Number of input features
hidden_size = 5     # Number of neurons in the hidden layer
output_size = 2     # Number of output neurons (for a binary classification task)
num_epochs = 20     # Number of training epochs
batch_size = 128     # Batch size for training
learning_rate = 0.0001  # Learning rate for the optimizer
num_ens = 10  # Number of ensembles

# Define a loss function and optimizer
criterion = _deep_ens_loss_
optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Use SGD optimizer

# Create a TensorDataset and DataLoader
num_samples = 10000
X = torch.randn(num_samples, input_size)  # Randomly generated input features
y = torch.randn(num_samples,)  # Randomly generated binary labels (0 or 1)
dataset = TensorDataset(X, y)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

models = []
for i in range(num_ens):
    # Instantiate the model
    model = SimpleNN(input_size, hidden_size, output_size)
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        for inputs, labels in data_loader:
            optimizer.zero_grad()  # Zero the parameter gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights
            running_loss += loss.item()
        avg_loss = running_loss / len(data_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
    models.append(model)

### Inference

In [121]:
# Create a TensorDataset and DataLoader
test_num_samples = 3000
X_test = torch.randn(test_num_samples, input_size)  # Randomly generated input features
y_test = torch.randn(test_num_samples,)   # Randomly generated binary labels (0 or 1)
test_dataset = TensorDataset(X_test, y_test)
test_data_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)
for test_inputs, _ in test_data_loader:
    with torch.no_grad():  # Disable gradient calculation for evaluation
        test_outputs = [model(test_inputs) for model in models]
test_means = reduce(lambda x, y: torch.add(x, y), [output[:, 0] for output in test_outputs])/num_ens
test_var = reduce(lambda x, y: torch.add(x, y),
                  [torch.add(torch.pow(output[:, 0], 2), torch.pow(output[:, 1], 2))
                   for output in test_outputs])/num_ens - torch.pow(test_means, 2)

test_means has the mean and test_var has the variance for gaussian distribution of prediction