This notebook demonstrate experiments on basic MLP and its Input Ouput Convex(IOC) counterpart on both CIFAR-10 and MNIST datasets. Here's a summary of the key components and experiments covered:

Exp 1: **NN and IOC-NN On CIFAR10**:
   - Implementing a simple MLP architecture using PyTorch.
   - Constructing an IOC-NN architecture, including weight exponentiation and other modifications to enforce convexity constraints.
   - Evaluating training and validation performance, and testing accuracy.

Exp 2: **Training on Duplicate Free data (ciFAIR10)**:
   - We use the same implementation of NN and IOC-NN on ciFAIR10 data set which doesn't have duplicate images in its test set

Exp 3: **Ensembles of Binary Experts**:

- **Models:** BinaryExpert (binary classifier) and GatingNetwork (weight predictor).
- **Ensemble:** Combination of BinaryExpert models weighted by GatingNetwork predictions.

Exp 4: **BoostedEnsemble**:

- **Initialization:** ExpertEnsemble setup with base model and expert count.
- **Training:** Experts trained separately with bootstrapped data, updating weights based on performance.
      
Exp 5. **MNIST**:
  - we implement the same architectures on MNIST data set
   

# Exp.1 - NN and IOC-NN On CIFAR10

In [None]:
import torch
from torch.utils.data import random_split,DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch import nn
import numpy as np
import torch.optim as optim

## NN Model

### Data Preparation

In [None]:
import torch
from torch.utils.data import random_split, DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10

# Define data transformation: Convert images to tensors
transform = transforms.Compose([
    transforms.ToTensor()
])

# Load CIFAR-10 train and test datasets
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

# Define batch size for data loaders
batch_size = 64

# Split train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

# Randomly split train data into train and validation sets
train_data, val_data = random_split(train_data, [train_size, validation_size])

# Create data loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 72241876.00it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


### Model Construction

In [None]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()

        # Define layers of the neural network
        self.layers = nn.Sequential(
            nn.Flatten(),
            # Flatten the input image tensor
            nn.Linear(32*32*3, 800),
            # Hidden Layer 1: Fully connected layer with 32*32*3 input features and 800 output features
            nn.BatchNorm1d(800),
            # Batch normalization for better training stability
            nn.ReLU(inplace=True),
            # ReLU activation function

            nn.Linear(800, 800),
            # Hidden Layer 2: Fully connected layer with 800 input features and 800 output features
            nn.BatchNorm1d(800),
            nn.ReLU(inplace=True),

            nn.Linear(800, 800),
            # Hidden Layer 3: Fully connected layer with 800 input features and 800 output features
            nn.BatchNorm1d(800),
            nn.ReLU(inplace=True),

            nn.Linear(800, 10),
            # Output Layer: Fully connected layer with 800 input features and 10 output features (for classification)
            nn.Softmax(dim=1)
            # Softmax activation function to obtain probabilities for each class
        )

    def forward(self, x):
        # Forward pass through the layers
        return self.layers(x)

### Model Training

In [None]:
mlp = NN()  # Create an instance of the neural network
loss_fun = nn.CrossEntropyLoss()  # Define the loss function
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)  # Define the optimizer
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 50

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')

    current_loss = []  # List to store current epoch's losses
    current_acc = []   # List to store current epoch's accuracies
    val_acc = []       # List to store validation accuracies

    # Training
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = mlp(inputs)  # Forward pass

        loss = loss_fun(outputs, targets)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize

        current_loss.append(loss.item())  # Append current loss to the list

        # Calculate and append accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item() / targets.shape[0]
        current_acc.append(acc)

    # Validation
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)  # Forward pass for validation

        # Calculate and append validation accuracy
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item() / targets.shape[0]
        val_acc.append(acc)

    # Performance Evaluation
    avg_train_loss = sum(current_loss) / len(current_loss)
    avg_train_acc = sum(current_acc) / len(current_acc)
    avg_val_acc = sum(val_acc) / len(val_acc)

    print('Loss : %.3f' % avg_train_loss)
    print('Training Accuracy : %.3f' % avg_train_acc)
    print('Validation Accuracy : %.3f' % avg_val_acc)

    # Early Stopping Criteria
    update = avg_val_acc - prev_val_acc
    print('Update : %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if 0 < update < 1e-4:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = avg_val_acc


### Model Evaluation/Testing

In [None]:
test_acc = []

# Iterate over the test dataset
for i, data in enumerate(test_loader, 0):
    inputs, targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs, 1).indices
    acc = (targets == pred).sum().item()
    test_acc.append(acc / targets.shape[0])

# Calculate average test accuracy
avg_test_acc = sum(test_acc) / len(test_acc)
print('Test_Acc: %.3f' % avg_test_acc)

Test_Acc  : 0.543


## IOC - NN Model

### Data Preparation

In [None]:
class WhiteningTransform:
    def __init__(self):
        self.whitening = None  # Initialize the whitening matrix to None

    def fit(self, data):
        mean = data.mean(dim=0)  # Compute the mean of the data along each dimension
        centered_data = data - mean  # Center the data by subtracting the mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)  # Compute the covariance matrix
        U, S, V = torch.svd(cov_matrix)  # Perform singular value decomposition (SVD) on the covariance matrix
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)  # Compute the whitening matrix

    def __call__(self, x):
        if self.whitening is not None:
            # Apply whitening transformation to the input data
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)  # Reshape the transformed data back to the original shape
            return x
        return x

# Creating a transform object with whitening transform
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image or numpy.ndarray to tensor
    WhiteningTransform()  # Apply whitening transformation
])

In [None]:
# Loading the CIFAR-10 dataset with whitening transformation
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 64

# Split training data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

# Randomly split train data into train and validation sets
train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


### Model Construction

In [None]:
class IOC_NN(nn.Module):
    def __init__(self):
        super().__init__()

        # Define the first hidden layer with increased number of nodes
        self.first_hidden_layer = nn.Linear(32*32*3, 1000)  # Hidden Layer 1

        # Define the rest of the layers using nn.Sequential
        self.layers = nn.Sequential(
            nn.Flatten(),  # Flatten the input image tensor
            self.first_hidden_layer,
            nn.BatchNorm1d(1000),
            # Batch normalization for better training stability
            nn.ELU(inplace=True),
            # ELU activation function

            nn.Linear(1000, 800),
            # Hidden Layer 2: Fully connected layer with 1000 input features and 800 output features
            nn.BatchNorm1d(800),
            nn.ELU(inplace=True),

            nn.Linear(800, 800),
            # Hidden Layer 3: Fully connected layer with 800 input features and 800 output features
            nn.BatchNorm1d(800),
            nn.ELU(inplace=True),

            nn.Linear(800, 10),
            # Output Layer: Fully connected layer with 800 input features and 10 output features (for classification)
            nn.Softmax(dim=1)
             # Softmax activation function to obtain probabilities for each class
        )

    def forward(self, x):
        return self.layers(x)


In [None]:
class WeightExponentiation(object):
    def __init__(self, epsilon=5):
        self.epsilon = epsilon
        # Epsilon value for constraining exponentiation of weights

    def __call__(self, module):
        # Check if the module has weights
        if hasattr(module, 'weight'):
            # Exclude the first_hidden_layer
            if "first_hidden_layer" not in module.__str__():
                w = module.weight.data
                # Get the weights of the module
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0] - self.epsilon)
                module.weight.data = w  # Store the updated weights

### Model Training

In [None]:
ioc_nn = IOC_NN()  # Instantiate the IOC_NN model
loss_fun = nn.CrossEntropyLoss()  # Define the loss function
optimizer = torch.optim.Adam(ioc_nn.parameters(), lr=1e-4, betas=(0.9, 0.9))
 # Define the optimizer with adjusted beta values for Adam optimizer
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 50

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')

    current_loss = []  # List to store current epoch's losses
    current_acc = []   # List to store current epoch's accuracies
    val_acc = []       # List to store validation accuracies

    # Training
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = ioc_nn(inputs)  # Forward pass

        loss = loss_fun(outputs, targets)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize

        ioc_nn.apply(WeightExponentiation())  # Apply weight exponentiation

        current_loss.append(loss.item())  # Append current loss to the list

        # Calculate and append accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item() / targets.shape[0]
        current_acc.append(acc)

    # Validation
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = ioc_nn(inputs)  # Forward pass for validation

        # Calculate and append validation accuracy
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item() / targets.shape[0]
        val_acc.append(acc)

    # Performance Evaluation
    avg_train_loss = sum(current_loss) / len(current_loss)
    avg_train_acc = sum(current_acc) / len(current_acc)
    avg_val_acc = sum(val_acc) / len(val_acc)

    print('Loss : %.3f' % avg_train_loss)
    print('Training Accuracy : %.3f' % avg_train_acc)
    print('Validation Accuracy : %.3f' % avg_val_acc)

    # Early Stopping Criteria
    update = avg_val_acc - prev_val_acc
    print('Update : %.4f' % update)
    print("-------------------------------------------------------------------------------------------")

    if 0 < update < 1e-4:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = avg_val_acc

Epoch 1
Loss : 2.261
Training_Acc  : 0.165
Validation_Acc  : 0.186
update: 0.1864
-------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.232
Training_Acc  : 0.209
Validation_Acc  : 0.211
update: 0.0246
-------------------------------------------------------------------------------------------
Epoch 3
Loss : 2.220
Training_Acc  : 0.223
Validation_Acc  : 0.222
update: 0.0106
-------------------------------------------------------------------------------------------
Epoch 4
Loss : 2.215
Training_Acc  : 0.229
Validation_Acc  : 0.225
update: 0.0035
-------------------------------------------------------------------------------------------
Epoch 5
Loss : 2.209
Training_Acc  : 0.237
Validation_Acc  : 0.235
update: 0.0101
-------------------------------------------------------------------------------------------
Epoch 6
Loss : 2.202
Training_Acc  : 0.246
Validation_Acc  : 0.245
update: 0.0096
------------------------------------------------

### Model Evaluation/Testing

In [None]:
test_acc = []  # List to store test accuracies

# Iterate over the test dataset
for i, data in enumerate(test_loader, 0):
    inputs, targets = data
    test_outputs = ioc_nn(inputs)
    pred = torch.max(test_outputs, 1).indices
    acc = (targets == pred).sum().item()
    test_acc.append(acc / targets.shape[0])

# Calculate average test accuracy
avg_test_acc = sum(test_acc) / len(test_acc)
print('Test Accuracy: %.3f' % avg_test_acc)

# Exp.2 - Training on Duplicate Free data (ciFAIR10)

In [None]:
import torchvision.datasets

class ciFAIR10(torchvision.datasets.CIFAR10):
    base_folder = 'ciFAIR-10'  # Base folder for the dataset
    url = 'https://github.com/cvjena/cifair/releases/download/v1.0/ciFAIR-10.zip'  # URL to download the dataset
    filename = 'ciFAIR-10.zip'  # Name of the downloaded zip file
    tgz_md5 = 'ca08fd390f0839693d3fc45c4e49585f'  # MD5 checksum of the zip file
    test_list = [
        ['test_batch', '01290e6b622a1977a000eff13650aca2'],  # List of test files along with their MD5 checksums
    ]

## NN Model

### Data Preparation

In [None]:
transform = transforms.Compose([transforms.ToTensor()])  # Define the transformation pipeline (convert images to tensors)

# Load ciFAIR-10 dataset for training and testing
train_data = ciFAIR10('data', train=True, download=True, transform=transform)  # Load training data
test_data = ciFAIR10('data', train=False, download=True, transform=transform)  # Load test data

batch_size = 64  # Set batch size for data loaders

# Split training data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8  # Ratio of training data
validation_ratio = 0.2  # Ratio of validation data

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

# Randomly split train data into train and validation sets
train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)  # DataLoader for training data
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)  # DataLoader for validation data
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)  # DataLoader for test data

Downloading https://objects.githubusercontent.com/github-production-release-asset-2e65be/168584397/527c7d80-2645-11e9-8008-a9ca4d2226ec?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231108%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231108T134006Z&X-Amz-Expires=300&X-Amz-Signature=5c751b7172c5eb2f471ffd9f151c36078e6f6263662d923363148e2302e1317d&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=168584397&response-content-disposition=attachment%3B%20filename%3DciFAIR-10.zip&response-content-type=application%2Foctet-stream to data/ciFAIR-10.zip


100%|██████████| 168614301/168614301 [00:00<00:00, 257535541.12it/s]


Extracting data/ciFAIR-10.zip to data
Files already downloaded and verified


### Model Training

In [None]:
mlp = NN()  # Instantiate the MLP model
loss_fun = nn.CrossEntropyLoss()  # Define the loss function (Cross Entropy Loss)
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)  # Define the optimizer (Adam optimizer) with a learning rate of 1e-4
epoch = 0  # Initialize epoch counter
prev_val_acc = 0.0  # Initialize previous validation accuracy
tol_epochs = 0  # Initialize tolerance epochs
max_epochs = 50  # Maximum number of epochs to train

while epoch < max_epochs and tol_epochs < 2:
    epoch += 1  # Increment epoch counter
    print(f'Epoch {epoch}')

    current_loss = []  # List to store current epoch's losses
    current_acc = []   # List to store current epoch's accuracies
    val_acc = []       # List to store validation accuracies

    # Training loop
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()  # Zero the gradients
        outputs = mlp(inputs)  # Forward pass

        loss = loss_fun(outputs, targets)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        current_loss.append(loss.item())  # Append current loss

        # Calculate and append accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item() / targets.shape[0]
        current_acc.append(acc)

    # Validation loop
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)  # Forward pass for validation

        # Calculate and append validation accuracy
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item() / targets.shape[0]
        val_acc.append(acc)

    # Print performance metrics
    print('Loss: %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training Accuracy: %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation Accuracy: %.3f' % (sum(val_acc) / len(val_acc)))

    # Early stopping criteria
    update = (sum(val_acc) / len(val_acc)) - prev_val_acc
    print('Update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if 0 < update < 1e-4:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)  # Update previous validation accuracy

Epoch 1
Loss : 2.067
Training_Acc  : 0.400
Validation_Acc  : 0.444
update: 0.4439
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 1.991
Training_Acc  : 0.474
Validation_Acc  : 0.466
update: 0.0221
--------------------------------------------------------------------------------------------------
Epoch 3
Loss : 1.954
Training_Acc  : 0.511
Validation_Acc  : 0.477
update: 0.0113
--------------------------------------------------------------------------------------------------
Epoch 4
Loss : 1.930
Training_Acc  : 0.535
Validation_Acc  : 0.494
update: 0.0163
--------------------------------------------------------------------------------------------------
Epoch 5
Loss : 1.907
Training_Acc  : 0.558
Validation_Acc  : 0.505
update: 0.0113
--------------------------------------------------------------------------------------------------
Epoch 6
Loss : 1.887
Training_Acc  : 0.578
Validation_Acc  : 0.503
update: -0.0023
------------

### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.525


## IOC - NN Model

### Data Preparation

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])


In [None]:

# Loading the MNIST dataset with whitening transformation
train_data = ciFAIR10('data', train=True, download=True, transform=transform)
test_data = ciFAIR10('data', train=False, download=True, transform=transform)

batch_size = 64

# Split training data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)


Files already downloaded and verified
Files already downloaded and verified


### Model Training

In [None]:
ioc_nn = IOC_NN()
loss_fun = nn.CrossEntropyLoss()
# Slowing down the learning rate decay using beta values
optimizer = torch.optim.Adam(ioc_nn.parameters(), lr=1e-4,betas=(0.9,0.9))
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 50


while(epoch <max_epochs and tol_epochs<2):
    epoch+=1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []
    # Training
    for i, data in enumerate(train_loader, 0):

      inputs, targets = data
      optimizer.zero_grad()
      outputs = ioc_nn(inputs)


      loss = loss_fun(outputs, targets)
      loss.backward()
      optimizer.step()

      ioc_nn.apply(WeightExponentiation())

      current_loss.append(loss.item())

      pred = torch.max(outputs,1).indices
      acc= (targets == pred).sum().item()
      current_acc.append(acc/targets.shape[0])

    # Validation
    for i,data in enumerate(val_loader,0):
      inputs,targets = data
      val_outputs = ioc_nn(inputs)
      pred = torch.max(val_outputs,1).indices
      acc =(targets==pred).sum().item()
      val_acc.append(acc/targets.shape[0])

    # Performance Evaluation

    print('Loss : %.3f' %(sum(current_loss) /len(current_loss)))
    print('Training_Acc  : %.3f'%(sum(current_acc)/len(current_acc)))
    print('Validation_Acc  : %.3f'%(sum(val_acc)/len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("-------------------------------------------------------------------------------------------")

    if(update>0 and update<tolerance):
      tol_epochs+=1
    else:
      tol_epochs = 0

    prev_val_acc = sum(val_acc)/len(val_acc)

Epoch 1
Loss : 2.262
Training_Acc  : 0.162
Validation_Acc  : 0.185
update: 0.1853
-------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.234
Training_Acc  : 0.206
Validation_Acc  : 0.216
update: 0.0308
-------------------------------------------------------------------------------------------
Epoch 3
Loss : 2.222
Training_Acc  : 0.221
Validation_Acc  : 0.230
update: 0.0142
-------------------------------------------------------------------------------------------
Epoch 4
Loss : 2.214
Training_Acc  : 0.230
Validation_Acc  : 0.236
update: 0.0056
-------------------------------------------------------------------------------------------
Epoch 5
Loss : 2.207
Training_Acc  : 0.239
Validation_Acc  : 0.247
update: 0.0108
-------------------------------------------------------------------------------------------
Epoch 6
Loss : 2.199
Training_Acc  : 0.250
Validation_Acc  : 0.250
update: 0.0031
------------------------------------------------

### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = ioc_nn(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.315


# Exp.3 - Ensembles of Binary Experts

1. **BinaryExpert Model:** This model represents a single binary classification expert. It consists of several fully connected layers followed by batch normalization and ELU activation functions. The output layer consists of a single unit followed by a sigmoid activation function, which outputs the probability of the positive class.

2. **GatingNetwork Model:** This model is responsible for predicting the weights for each expert in the ensemble. It takes the flattened input tensor and passes it through a fully connected layer. The output of this layer is then passed through a softmax activation function to obtain the weights for each expert.

3. **Ensemble of Binary Experts:** To create an ensemble of binary experts, multiple instances of the BinaryExpert model are instantiated. Additionally, a single instance of the GatingNetwork model is instantiated to compute the weights for each expert. During training, the input data is passed through the gating network to obtain the weights for each expert. Then, each expert model is individually fed the input data, and their predictions are weighted by the corresponding weights obtained from the gating network. Finally, the weighted predictions are combined to obtain the ensemble prediction.

## IOC-NN Model

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])

In [None]:
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

# Converting the original data to Binary "1" for Animal and "0" for Not an Animal
train_data.targets = torch.FloatTensor([1 if i>=2 and i<=7 else 0 for i in train_data.targets])
test_data.targets = torch.FloatTensor([1 if i>=2 and i<=7 else 0 for i in test_data.targets])

batch_size = 64
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 28526723.48it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [None]:
class BinaryExpert(nn.Module):
    def __init__(self):
        super().__init__()
        # Define the layers of the binary expert model
        self.first_hidden_layer = nn.Linear(32*32*3, 1000)  # Input layer to first hidden layer
        self.layers = nn.Sequential(
            nn.Flatten(),           # Flatten the input
            self.first_hidden_layer,  # First hidden layer
            nn.BatchNorm1d(1000),   # Batch normalization
            nn.ELU(True),           # ELU activation function

            nn.Linear(1000, 800),   # Second hidden layer
            nn.BatchNorm1d(800),    # Batch normalization
            nn.ELU(True),           # ELU activation function

            nn.Linear(800, 800),    # Third hidden layer
            nn.BatchNorm1d(800),    # Batch normalization
            nn.ELU(True),           # ELU activation function

            nn.Linear(800, 1),      # Output layer (single unit for binary classification)
        )
        self.sigmoid = nn.Sigmoid()  # Sigmoid activation function for binary classification

    def forward(self, x):
        # Forward pass through the layers
        x = self.layers(x)
        x = self.sigmoid(x)  # Apply sigmoid activation to get probabilities
        return x

In [None]:
class GatingNetwork(nn.Module):
    def __init__(self, input_channels, input_height, input_width, num_experts):
        super().__init__()
        input_size = input_channels * input_height * input_width  # Calculate the input size
        self.fc = nn.Linear(input_size, num_experts)  # Fully connected layer to predict expert weights
        self.softmax = nn.Softmax(dim=1)  # Softmax activation function along the dimension of experts

    def forward(self, x):
        x = x.reshape(x.size(0), -1)  # Flatten the input tensor
        x = self.softmax(self.fc(x))  # Pass through fully connected layer and apply softmax
        return x


In [None]:
# Class for Weight Exponentiation
class WeightExponentiation(object):

    def __init__(self, epsilon = 5):
         # epsilon for constraining exponentiation of weights
         self.epsilon = epsilon


    def __call__(self, module):
        if hasattr(module, 'weight'):
            # Selecting all other layers except "first_hidden_layer"
            if "first_hidden_layer" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0]-self.epsilon)
                module.weight.data = w # Storing the updated weights

In [None]:
input_channels = 3
input_height = 32
input_width = 32
num_experts = 4

# Instantiate the gating network
gating_net = GatingNetwork(input_channels, input_height, input_width, num_experts)

# Instantiate expert networks and store them in a ModuleList
expert_nets = nn.ModuleList([BinaryExpert() for _ in range(num_experts)])

# Combine parameters for optimization
parameters = list(gating_net.parameters())  # Get parameters of the gating network
for expert_net in expert_nets:
    parameters += list(expert_net.parameters())  # Get parameters of each expert network


In [None]:
num_epochs = 10
optimizer = optim.Adam(parameters, lr=0.0001, betas=(0.9, 0.9))
criterion = nn.BCELoss()  # Binary cross-entropy loss for binary classification

for epoch in range(num_epochs):
    # Training
    for data, labels in train_loader:
        # E-step: Obtain gating coefficients from the gating network
        gating_coefficients = gating_net(data).float()

        # M-step: Update gating network parameters
        optimizer.zero_grad()
        loss = criterion(gating_coefficients[:, 1], labels.float())  # Use BCELoss on the second coefficient
        loss.backward()
        optimizer.step()

        # Apply weight exponentiation to expert networks
        for expert_net in expert_nets:
            expert_net.apply(WeightExponentiation())

    # Validation
    with torch.no_grad():
        val_loss = 0.0
        for data, labels in val_loader:
            gating_coefficients = gating_net(data)
            val_loss += criterion(gating_coefficients[:, 1], labels)
        val_loss /= len(val_loader)

    print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {loss.item()}, Validation Loss: {val_loss.item()}")

# Test the model on the test set
with torch.no_grad():
    test_loss = 0.0
    correct = 0
    total = 0

    for data, labels in test_loader:
        gating_coefficients = gating_net(data)
        test_loss += criterion(gating_coefficients[:, 1], labels)

        predicted_labels = (gating_coefficients[:, 1] > 0.5).float()  # Threshold gating coefficients for binary prediction
        total += labels.size(0)
        correct += (predicted_labels == labels).sum().item()

    test_loss /= len(test_loader)
    accuracy = correct / total

    print(f"Test Loss: {test_loss.item()}, Test Accuracy: {accuracy}")

Epoch 1/10, Training Loss: 0.40685030817985535, Validation Loss: 0.4584801197052002
Epoch 2/10, Training Loss: 0.4153115451335907, Validation Loss: 0.4627629220485687
Epoch 3/10, Training Loss: 0.44500815868377686, Validation Loss: 0.44609904289245605
Epoch 4/10, Training Loss: 0.5660567879676819, Validation Loss: 0.4430065155029297
Epoch 5/10, Training Loss: 0.45420771837234497, Validation Loss: 0.4399407207965851
Epoch 6/10, Training Loss: 0.4201667010784149, Validation Loss: 0.4388028681278229
Epoch 7/10, Training Loss: 0.3349223732948303, Validation Loss: 0.4423254728317261
Epoch 8/10, Training Loss: 0.3934375047683716, Validation Loss: 0.43683770298957825
Epoch 9/10, Training Loss: 0.4281715154647827, Validation Loss: 0.4289214611053467
Epoch 10/10, Training Loss: 0.4822363257408142, Validation Loss: 0.4294072985649109
Test Loss: 0.4208977520465851, Test Accuracy: 0.8183


# Exp.4 - BoostedEnsemble

1. **Initialization**:
  - The class `ExpertEnsemble` is initialized with parameters such as `base_learner` representing the base model architecture and `num_experts` indicating the number of experts in the ensemble.
  - `self.experts` is created as a list containing `num_experts` experts, each instantiated using the provided base learner function.
  - `self.criterion` is set to the cross-entropy loss function for training.

2. **Training Method**:
  - `train_experts` method trains each expert individually using bootstrapped samples and updated weights.
  - Sample weights for the entire training dataset are initialized, and for each expert:
    - The expert is trained for a fixed number of epochs using bootstrapped samples.
    - Sample weights are updated based on the expert's performance, and a new dataloader with bootstrapped samples is created for the next expert.

3. **Training Single Expert Method**:
  - `train_single_expert` method trains a single expert for a fixed number of epochs using the Adam optimizer.
  - Loss is calculated using the cross-entropy loss function, and model parameters are updated.
  - Weight exponentiation is applied after each update to maintain diversity among experts.

4. **Sample Weight Update Method**:
  - `update_sample_weights` method updates sample weights based on misclassifications of a weak learner (expert).
  - Cross-entropy loss and gradients are used to adjust sample weights, and normalization is applied to ensure proper weighting.

## IOC-NN Model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Creating a transform object with whitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
   WhiteningTransform()
])
# Loading the MNIST dataset with whitening transformation
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 64

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43428993.45it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

class ExpertEnsemble(nn.Module):
    def __init__(self, base_learner, num_experts: int = 3):
        super().__init__()
        self.num_experts = num_experts
        # Initialize a list of experts with the provided base learner
        self.experts = nn.ModuleList([base_learner().to(device) for _ in range(self.num_experts)])
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        # Pass input data through each expert and return their outputs
        expert_outputs = [expert(x) for expert in self.experts]
        return expert_outputs

    def train_experts(self, train_loader):
        # Initialize sample weights for training data
        weights = torch.ones(len(train_loader.dataset))
        for i in range(self.num_experts):
            print("Training Expert:", i)
            # Train a single expert with updated weights
            self.train_single_expert(self.experts[i], train_loader)
            # Update sample weights based on expert's performance
            weights = self.update_sample_weights(self.experts[i], train_loader.dataset, weights)
            # Create a new DataLoader with bootstrapped samples based on updated weights
            train_loader = self.bootstrap_dataloader(train_loader, weights)

    def train_single_expert(self, model, train_loader):
        model.train()
        optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=[0.9, 0.9])
        max_epochs = 2

        for epoch in range(max_epochs):
            print(f'Epoch {epoch + 1}')
            current_loss = []
            current_acc = []

            # Training loop
            for i, (inputs, targets) in enumerate(train_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = self.criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                # Apply weight exponentiation after each update
                model.apply(WeightExponentiation())

                current_loss.append(loss.item())
                pred = torch.max(outputs, 1).indices
                acc = (targets == pred).sum().item()
                current_acc.append(acc / targets.shape[0])

            # Performance evaluation
            print('Loss: %.3f' % (sum(current_loss) / len(current_loss)))
            print('Training Accuracy: %.3f' % (sum(current_acc) / len(current_acc)))

    def update_sample_weights(self, weak_learner, dataset, sample_weights, learning_rate=0.001):
        criterion = nn.CrossEntropyLoss(reduction='none')
        dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=True)

        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = weak_learner(inputs)
            loss = criterion(outputs, labels)

            # Update sample weights based on misclassifications
            misclassifications = (outputs.argmax(dim=1) != labels).float()
            sample_weights *= torch.exp(learning_rate * misclassifications)
            # Normalize weights
            sample_weights /= sample_weights.sum()

        return sample_weights

    def bootstrap_dataloader(self, dataloader, sample_weights):
        num_samples = len(dataloader.dataset)
        bootstrap_indices = torch.multinomial(sample_weights, num_samples, replacement=True).int()
        # Create a new DataLoader with bootstrapped samples
        return DataLoader([dataloader.dataset[i] for i in bootstrap_indices])

In [None]:
class BoostingEnsemble:
    def __init__(self, num_experts, base_learner, gating_network, train_loader):
        self.num_experts = num_experts
        self.base_learner = base_learner
        self.gating_network = gating_network(num_experts).to(device)
        self.train_loader = train_loader

        self.expert_training()
        self.gn_training()
        self.testing()

    def expert_training(self):
        # Train the individual experts
        model = ExpertEnsemble(self.base_learner, self.num_experts)
        model.train_experts(self.train_loader)
        print("Expert Training Completed")
        self.trained_experts = model.experts

        # Enable gradient calculation for experts' parameters
        for expert in self.trained_experts:
            for param in expert.parameters():
                param.requires_grad = True

    def gn_training(self):
        optimizer = torch.optim.Adam(self.gating_network.parameters(), lr=1e-4, betas=[0.9, 0.9])
        max_epochs = 5
        loss_fun = nn.CrossEntropyLoss()

        for epoch in range(max_epochs):
            print(f'Epoch {epoch + 1}')
            current_loss = []
            current_acc = []

            # Training loop
            for i, (inputs, targets) in enumerate(self.train_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()

                # Forward pass through experts and concatenation
                combined_outputs = torch.cat([expert(inputs) for expert in self.trained_experts], dim=1)

                outputs = self.gating_network(combined_outputs)
                loss = loss_fun(outputs, targets)

                loss.backward()
                optimizer.step()

                # Apply weight exponentiation after each update
                self.gating_network.apply(WeightExponentiation())
                current_loss.append(loss.item())

                pred = torch.max(outputs, 1).indices
                acc = (targets == pred).sum().item()
                current_acc.append(acc / targets.shape[0])

            # Performance Evaluation
            print('Loss: %.3f' % (sum(current_loss) / len(current_loss)))
            print('Training Accuracy: %.3f' % (sum(current_acc) / len(current_acc)))

        print("Training of Gating Network Completed")

    def testing(self):
        test_acc = []
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)

                # Forward pass through experts and concatenation
                combined_outputs = torch.cat([expert(inputs) for expert in self.trained_experts], dim=1)
                test_outputs = self.gating_network(combined_outputs)

                pred = torch.max(test_outputs, 1).indices
                acc = (targets == pred).sum().item()
                test_acc.append(acc / targets.shape[0])

        print('Test Accuracy: %.3f' % (sum(test_acc) / len(test_acc)))

In [None]:
import torch
import torch.nn as nn

class NN_Gating(nn.Module):
    def __init__(self, n_experts: int = 3):
        """
        Initialize the Gating Network for the IOC-NN ensemble.

        Args:
            n_experts (int): Number of experts in the ensemble.
        """
        super().__init__()
        self.n_experts = n_experts

        # Define the first hidden layer with an input size calculated based on the number of experts
        self.first_hidden_layer = nn.Linear(self.n_experts * 10, 1000)

        # Define the sequential layers for the gating network
        self.layers = nn.Sequential(
            nn.Flatten(),
            self.first_hidden_layer,
            nn.BatchNorm1d(1000),
            nn.ELU(True),
            nn.Linear(1000, 800),
            nn.BatchNorm1d(800),
            nn.ELU(True),
            nn.Linear(800, 10),  # Output Layer
            nn.Softmax(1)
        )

    def forward(self, x):
        """
        Forward pass through the gating network.

        Args:
            x (torch.Tensor): Input tensor representing concatenated outputs from the ensemble.

        Returns:
            torch.Tensor: Output tensor representing class probabilities.
        """
        # Reshape the input tensor to have a suitable shape for the network
        x = x.reshape(x.size(0), -1)

        # Pass the input through the defined layers
        x = self.layers(x)
        return x

In [None]:
Boosting_Ensemble(3,IOC_NN,NN_Gating,train_loader)

Expert: 0
Epoch 1
Loss: 2.258
Training Accuracy: 0.172
Epoch 2
Loss: 2.227
Training Accuracy: 0.215
Expert: 1
Epoch 1
Loss: 2.256
Training Accuracy: 0.174
Epoch 2
Loss: 2.226
Training Accuracy: 0.217
Expert: 2
Epoch 1
Loss: 2.255
Training Accuracy: 0.172
Epoch 2
Loss: 2.226
Training Accuracy: 0.216
Expert Training Completed
Epoch 1
Loss : 2.215
Training_Acc  : 0.231
Epoch 2
Loss : 2.204
Training_Acc  : 0.242
Epoch 3
Loss : 2.200
Training_Acc  : 0.247
Epoch 4
Loss : 2.198
Training_Acc  : 0.247
Epoch 5
Loss : 2.195
Training_Acc  : 0.253
Training of Gating Network Completed
Test_Acc  : 0.261


<__main__.Boosting_Ensemble at 0x7cb59ec45030>

# Exp.5 - MNIST

## NN Model

### Data Preparation

In [None]:
from torchvision.datasets import MNIST

In [None]:
# Loading Trian and Test data
transform = transforms.Compose([transforms.ToTensor()])
train_data = MNIST('data', train=True, download=True, transform=transform)
test_data = MNIST('data', train=False, download=True, transform=transform)

batch_size = 64
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

### Model Construction

In [None]:
class NN(nn.Module):
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Flatten(),
        nn.Linear(28*28*1,800), # Hidden Layer 1
        nn.BatchNorm1d(800),
        nn.ReLU(True),

        nn.Linear(800,800), # Hidden Layer 2
        nn.BatchNorm1d(800),
        nn.ReLU(True),

        nn.Linear(800,800), # Hidden Layer 3
        nn.BatchNorm1d(800),
        nn.ReLU(True),

        nn.Linear(800,10), # Output Layer
        nn.Softmax(1)
    )
  def forward(self,x):
    return self.layers(x)

### Model Training

In [None]:
mlp = NN()
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 50

while(epoch <max_epochs and tol_epochs<2):
    epoch+=1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []

    # Training
    for i, data in enumerate(train_loader, 0):

      inputs, targets = data
      optimizer.zero_grad()
      outputs = mlp(inputs)


      loss = loss_fun(outputs, targets)
      loss.backward()
      optimizer.step()
      current_loss.append(loss.item())

      pred = torch.max(outputs,1).indices
      acc= (targets == pred).sum().item()
      current_acc.append(acc/targets.shape[0])

    # Validation
    for i,data in enumerate(val_loader,0):
      inputs,targets = data
      val_outputs = mlp(inputs)
      pred = torch.max(val_outputs,1).indices
      acc =(targets==pred).sum().item()
      val_acc.append(acc/targets.shape[0])

    # Performance Evaluation

    print('Loss : %.3f' %(sum(current_loss) /len(current_loss)))
    print('Training_Acc  : %.3f'%(sum(current_acc)/len(current_acc)))
    print('Validation_Acc  : %.3f'%(sum(val_acc)/len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if(update>0 and update<tolerance):
      tol_epochs+=1
    else:
      tol_epochs = 0

    prev_val_acc = sum(val_acc)/len(val_acc)

Epoch 1
Loss : 1.564
Training_Acc  : 0.924
Validation_Acc  : 0.959
update: 0.9594
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 1.497
Training_Acc  : 0.971
Validation_Acc  : 0.968
update: 0.0081
--------------------------------------------------------------------------------------------------
Epoch 3
Loss : 1.486
Training_Acc  : 0.980
Validation_Acc  : 0.974
update: 0.0061
--------------------------------------------------------------------------------------------------
Epoch 4
Loss : 1.482
Training_Acc  : 0.983
Validation_Acc  : 0.973
update: -0.0006
--------------------------------------------------------------------------------------------------
Epoch 5
Loss : 1.477
Training_Acc  : 0.987
Validation_Acc  : 0.976
update: 0.0025
--------------------------------------------------------------------------------------------------
Epoch 6
Loss : 1.475
Training_Acc  : 0.989
Validation_Acc  : 0.973
update: -0.0030
-----------

### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.981


## IOC - NN Model

### Data Preparation

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 1, 28, 28)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])


In [None]:

# Loading the MNIST dataset with whitening transformation
train_data = MNIST('data', train=True, download=True, transform=transform)
test_data = MNIST('data', train=False, download=True, transform=transform)

batch_size = 64

# Split training data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 148088546.15it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 45318254.33it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 35157445.39it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14185054.93it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



### Model Construction

In [None]:
class IOC_NN(nn.Module):
  def __init__(self):
    super().__init__()
    # Increasing the no of nodes of the first hidden layer that allows negative weights
    self.first_hidden_layer = nn.Linear(28*28*1,1000) # Hidden Layer 1
    self.layers = nn.Sequential(
        nn.Flatten(),
        self.first_hidden_layer,
        nn.BatchNorm1d(1000),
        nn.ELU(True),

        nn.Linear(1000,800), # Hidden Layer 2
        nn.BatchNorm1d(800),
        nn.ELU(True),

        nn.Linear(800,800), # Hidden Layer 3
        nn.BatchNorm1d(800),
        nn.ELU(True),

        nn.Linear(800,10), # Output Layer
        nn.Softmax(1)
    )
  def forward(self,x):
    return self.layers(x)

In [None]:
# Class for Weight Exponentiation
class WeightExponentiation(object):

    def __init__(self, epsilon = 5):
        self.epsilon = epsilon # epsilon for constraining exponentiation of weights


    def __call__(self, module):
        if hasattr(module, 'weight'):
            # Selecting all other layers except "first_hidden_layer"
            if "first_hidden_layer" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0]-self.epsilon)
                module.weight.data = w # Storing the updated weights


### Model Training

In [None]:
ioc_nn = IOC_NN()
loss_fun = nn.CrossEntropyLoss()
# Slowing down the learning rate decay using beta values
optimizer = torch.optim.Adam(ioc_nn.parameters(), lr=1e-4,betas=(0.9,0.9))
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 50


while(epoch <max_epochs and tol_epochs<2):
    epoch+=1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []
    # Training
    for i, data in enumerate(train_loader, 0):

      inputs, targets = data
      optimizer.zero_grad()
      outputs = ioc_nn(inputs)


      loss = loss_fun(outputs, targets)
      loss.backward()
      optimizer.step()

      ioc_nn.apply(WeightExponentiation())

      current_loss.append(loss.item())

      pred = torch.max(outputs,1).indices
      acc= (targets == pred).sum().item()
      current_acc.append(acc/targets.shape[0])

    # Validation
    for i,data in enumerate(val_loader,0):
      inputs,targets = data
      val_outputs = ioc_nn(inputs)
      pred = torch.max(val_outputs,1).indices
      acc =(targets==pred).sum().item()
      val_acc.append(acc/targets.shape[0])

    # Performance Evaluation

    print('Loss : %.3f' %(sum(current_loss) /len(current_loss)))
    print('Training_Acc  : %.3f'%(sum(current_acc)/len(current_acc)))
    print('Validation_Acc  : %.3f'%(sum(val_acc)/len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("-------------------------------------------------------------------------------------------")

    if(update>0 and update<tolerance):
      tol_epochs+=1
    else:
      tol_epochs = 0

    prev_val_acc = sum(val_acc)/len(val_acc)

Epoch 1
Loss : 2.188
Training_Acc  : 0.242
Validation_Acc  : 0.327
update: 0.3268
-------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.025
Training_Acc  : 0.448
Validation_Acc  : 0.539
update: 0.2126
-------------------------------------------------------------------------------------------
Epoch 3
Loss : 1.877
Training_Acc  : 0.609
Validation_Acc  : 0.662
update: 0.1226
-------------------------------------------------------------------------------------------
Epoch 4
Loss : 1.790
Training_Acc  : 0.696
Validation_Acc  : 0.730
update: 0.0684
-------------------------------------------------------------------------------------------
Epoch 5
Loss : 1.740
Training_Acc  : 0.745
Validation_Acc  : 0.766
update: 0.0359
-------------------------------------------------------------------------------------------
Epoch 6
Loss : 1.708
Training_Acc  : 0.777
Validation_Acc  : 0.792
update: 0.0256
------------------------------------------------

### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = ioc_nn(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.913
