This notebook demonstrate experiments on baseline AllConv and its Input Ouput Convex(IOC) counterpart. Here's a summary of the key components and experiments covered:

Exp 1: **AllConv and IOC-AllConv FOR CIFAR-10**:
   - Implementing baseline architecture of AllConv and constructing an IOC-AllConv architecture enforcing convexity constraints similar to that of IOC-NN.

Exp 2: **Training on Duplicate Free data (ciFAIR10)**:
   - We use the same implementation of AllConv and IOC-AllConv on ciFAIR10 data set.

Exp 3: **Partially randomized labeling**:
   - We modify the original CIFAR10 dataset by randomly introducing noisy labels based on a specified noise percentage and report the performance of the models on the noise induced data sets.

Exp 4: **BoostedEnsemble**:

- **Initialization:** ExpertEnsemble setup with base model and expert count.
- **Training:** Experts trained separately with bootstrapped data, updating weights based on performance.   

In [None]:
import torch
from torch.utils.data import random_split,DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch import nn

# Exp1. AllConv and IOC-AllConv FOR CIFAR-10

## AllConv

In [None]:
# Definition of the AllCNN model class
class AllCNN(nn.Module):
    def __init__(self):
        super().__init__()

        # Define convolutional layers
        self.layers = nn.Sequential(
            # Input channels: 3, Output channels: 96, Kernel size: 3
            nn.Conv2d(3, 96, 3),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 96, Output channels: 96, Kernel size: 3
            nn.Conv2d(96, 96, 3),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 96, Output channels: 96, Kernel size: 3, Stride: 2 (downsampling)
            nn.Conv2d(96, 96, 3, stride=2),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 96, Output channels: 192, Kernel size: 3
            nn.Conv2d(96, 192, 3),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 192, Output channels: 192, Kernel size: 3
            nn.Conv2d(192, 192, 3),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 192, Output channels: 192, Kernel size: 3, Stride: 2 (downsampling)
            nn.Conv2d(192, 192, 3, stride=2),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 192, Output channels: 192, Kernel size: 3
            nn.Conv2d(192, 192, 3),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 192, Output channels: 192, Kernel size: 1
            nn.Conv2d(192, 192, 1),
            # ReLU activation function
            nn.ReLU(),
            # Input channels: 192, Output channels: 10, Kernel size: 1
            nn.Conv2d(192, 10, 1),
            # ReLU activation function
            nn.ReLU()
        )

        # Adaptive average pooling layer
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))

        # Softmax activation function
        self.softmax = nn.Softmax(1)

    # Forward method to define the forward pass of the model
    def forward(self, x):
        # Forward pass through convolutional layers
        x = self.layers(x)

        # Apply adaptive average pooling
        x = self.pooling(x)

        # Squeeze the tensor to remove dimensions of size 1
        x = x.squeeze()

        # Apply softmax activation function
        return self.softmax(x)

### Data Preparation

In [None]:
# Loading Trian and Test data
transform = transforms.Compose([transforms.ToTensor()])
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 256
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


### Model Training

In [None]:
# Initialize model, loss function, and optimizer
mlp = AllCNN()
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)

# Initialize variables for tracking epochs and early stopping
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

# Training loop
while(epoch < max_epochs and tol_epochs < 2):
    epoch += 1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    val_acc = []

    # Training
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)

        # Calculate loss and perform backpropagation
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss.append(loss.item())

        # Calculate accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item()
        current_acc.append(acc / targets.shape[0])

    # Validation
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item()
        val_acc.append(acc / targets.shape[0])

    # Performance Evaluation
    print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if update > 0 and update < tolerance:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 2.269
Training_Acc  : 0.170
Validation_Acc  : 0.216
update: 0.2158
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.212
Training_Acc  : 0.237
Validation_Acc  : 0.258
update: 0.0418
--------------------------------------------------------------------------------------------------


### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.257


## IOC - AllConv

### Data Preparation

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])


In [None]:

# Loading the MNIST dataset with whitening transformation
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 256

# Split training data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)


Files already downloaded and verified
Files already downloaded and verified


### Model Construction

In [None]:


class IOC_AllCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Define the first convolutional layer with extra filters
        self.first_conv_layer = nn.Conv2d(3, 192, 3)

        # Define the rest of the layers in the network
        self.layers = nn.Sequential(
            self.first_conv_layer,
            nn.BatchNorm2d(192),  # Batch normalization layer
            nn.ELU(True),         # ELU activation function

            nn.Conv2d(192, 96, 3),
            nn.BatchNorm2d(96),
            nn.ELU(True),

            nn.Conv2d(96, 96, 3, stride=2),
            nn.BatchNorm2d(96),
            nn.ELU(True),

            nn.Conv2d(96, 192, 3),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 192, 3),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 192, 3, stride=2),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 192, 3),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 192, 1),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 10, 1),
            nn.BatchNorm2d(10),
            nn.ELU(True)
        )
        # Adaptive average pooling layer
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))
        # Softmax activation function
        self.softmax = nn.Softmax(1)

    def forward(self, x):
        # Forward pass through convolutional layers
        x = self.layers(x)
        # Apply adaptive average pooling
        x = self.pooling(x)
        # Squeeze the tensor to remove dimensions of size 1
        x = x.squeeze()
        # Apply softmax activation function
        return self.softmax(x)

In [None]:


# Class for Weight Exponentiation
class WeightExponentiation(object):
    def __init__(self, epsilon=5):
        """
        Initialize WeightExponentiation object.

        Parameters:
            epsilon (float): Epsilon for constraining exponentiation of weights.
        """
        self.epsilon = epsilon

    def __call__(self, module):
        """
        Callable method to perform weight exponentiation operation on negative weights.

        Parameters:
            module (torch.nn.Module): Module to apply weight exponentiation operation.

        """
        if hasattr(module, 'weight'):
            # Selecting all layers except "first_conv_layer"
            if "first_conv_layer" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0] - self.epsilon)
                module.weight.data = w  # Update the weights

### Model Training

In [None]:


# Initialize the IOC_AllCNN model, loss function, optimizer, and other variables
mlp = IOC_AllCNN()
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []

    # Training loop
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)

        # Calculate loss and perform backpropagation
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()

        # Apply weight exponentiation after each training iteration
        mlp.apply(WeightExponentiation())
        current_loss.append(loss.item())

        # Calculate training accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item()
        current_acc.append(acc / targets.shape[0])

    # Validation loop
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item()
        val_acc.append(acc / targets.shape[0])

    # Performance Evaluation
    print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if update > 0 and update < tolerance:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 2.132
Training_Acc  : 0.444
Validation_Acc  : 0.533
update: 0.5330
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.047
Training_Acc  : 0.568
Validation_Acc  : 0.584
update: 0.0506
--------------------------------------------------------------------------------------------------


### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.583


# Exp2. Training on Duplicate Free data (ciFAIR10)

## AllConv

In [None]:
import torchvision.datasets

class ciFAIR10(torchvision.datasets.CIFAR10):
    base_folder = 'ciFAIR-10'
    url = 'https://github.com/cvjena/cifair/releases/download/v1.0/ciFAIR-10.zip'
    filename = 'ciFAIR-10.zip'
    tgz_md5 = 'ca08fd390f0839693d3fc45c4e49585f'
    test_list = [
        ['test_batch', '01290e6b622a1977a000eff13650aca2'],
    ]

### Data Preparation

In [None]:
# Loading Trian and Test data
transform = transforms.Compose([transforms.ToTensor()])
train_data = ciFAIR10('data', train=True, download=True, transform=transform)
test_data = ciFAIR10('data', train=False, download=True, transform=transform)

batch_size = 64
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

Downloading https://objects.githubusercontent.com/github-production-release-asset-2e65be/168584397/527c7d80-2645-11e9-8008-a9ca4d2226ec?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231117%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231117T142952Z&X-Amz-Expires=300&X-Amz-Signature=6b5f6d46f27db1df1abb6217c75eae7813b88b0fc352788e02cceb9dd677e8bb&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=168584397&response-content-disposition=attachment%3B%20filename%3DciFAIR-10.zip&response-content-type=application%2Foctet-stream to data/ciFAIR-10.zip


100%|██████████| 168614301/168614301 [00:00<00:00, 177712263.68it/s]


Extracting data/ciFAIR-10.zip to data
Files already downloaded and verified


### Model Training

In [None]:


# Initialize the model, loss function, optimizer, and other variables
mlp = AllCNN()
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []

    # Training loop
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)

        # Calculate loss and perform backpropagation
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()

        # Store current loss
        current_loss.append(loss.item())

        # Calculate training accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item()
        current_acc.append(acc / targets.shape[0])

    # Validation loop
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item()
        val_acc.append(acc / targets.shape[0])

    # Performance Evaluation
    print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    # Update tolerance epochs
    if update > 0 and update < tolerance:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 2.217
Training_Acc  : 0.225
Validation_Acc  : 0.302
update: 0.3024
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.139
Training_Acc  : 0.313
Validation_Acc  : 0.335
update: 0.0329
--------------------------------------------------------------------------------------------------


### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.335


## IOC - ALLConv

### Data Preparation

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])


In [None]:

# Loading the MNIST dataset with whitening transformation
train_data = ciFAIR10('data', train=True, download=True, transform=transform)
test_data = ciFAIR10('data', train=False, download=True, transform=transform)

batch_size = 64

# Split training data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for train, validation, and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Class for Weight Exponentiation
class WeightExponentiation(object):

    def __init__(self, epsilon = 5):
        self.epsilon = epsilon # epsilon for constraining exponentiation of weights


    def __call__(self, module):
        if hasattr(module, 'weight'):
            # Selecting all other layers except "first_hidden_layer"
            if "first_hidden_layer" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0]-self.epsilon)
                module.weight.data = w # Storing the updated weights


### Model Training

In [None]:

# Initialize the IOC_AllCNN model, loss function, optimizer, and other variables
mlp = IOC_AllCNN()
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []

    # Training loop
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)

        # Calculate loss and perform backpropagation
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()

        # Store current loss
        current_loss.append(loss.item())

        # Calculate training accuracy
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item()
        current_acc.append(acc / targets.shape[0])

    # Validation loop
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item()
        val_acc.append(acc / targets.shape[0])

    # Performance Evaluation
    print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    # Update tolerance epochs
    if update > 0 and update < tolerance:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 2.116
Training_Acc  : 0.457
Validation_Acc  : 0.521
update: 0.5210
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.032
Training_Acc  : 0.561
Validation_Acc  : 0.590
update: 0.0688
--------------------------------------------------------------------------------------------------


### Model Evaluation/Testing

In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.580


#Exp 3. Partially randomized labeling

## AllConv

In [None]:
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, random_split, Dataset
import numpy as np

In [None]:
class NoisyCIFAR10(Dataset):
    def __init__(self, base_dataset, noise_percentage):
        """
        Initialize the NoisyCIFAR10 dataset.

        Parameters:
            base_dataset (Dataset): Original CIFAR10 dataset.
            noise_percentage (float): Percentage of noisy labels to introduce.
        """
        self.base_dataset = base_dataset
        self.noise_percentage = noise_percentage
        self.labels = np.array(self.base_dataset.dataset.targets)

        # Calculate the number of noisy labels to introduce
        num_noisy_labels = int(self.noise_percentage * len(self.labels) / 100)

        # Randomly choose indices to introduce noise
        noisy_indices = np.random.choice(len(self.labels), num_noisy_labels, replace=False)

        # Generate random noisy labels
        self.noisy_labels = np.random.randint(0, 10, num_noisy_labels)

        # Replace original labels with noisy labels at selected indices
        self.labels[noisy_indices] = self.noisy_labels

    def __len__(self):
        """
        Get the length of the dataset.

        Returns:
            int: Length of the base dataset.
        """
        return len(self.base_dataset)

    def __getitem__(self, index):
        """
        Get an item from the dataset.

        Parameters:
            index (int): Index of the item to retrieve.

        Returns:
            tuple: Tuple containing the image and its label.
        """
        # Get image and target label from the base dataset
        img, target = self.base_dataset[index]

        return img, target

- **Initialization**: The `NoisyCIFAR10` class takes in the original CIFAR10 dataset (`base_dataset`) and a noise percentage (`noise_percentage`). It calculates the number of noisy labels to introduce based on the percentage provided.

- **Introducing Noisy Labels**: It randomly selects indices from the original dataset to introduce noise. Then, it generates random noisy labels for those indices and replaces the original labels with these noisy labels. This step simulates the introduction of label noise into the dataset.

- **Data Access**: When accessing elements of the `NoisyCIFAR10` dataset, it retrieves images and their corresponding labels from the base dataset. Some of these labels may have been replaced with noisy labels, depending on the indices selected during initialization.

- **Usage**: This class can be used as a drop-in replacement for the original CIFAR10 dataset in PyTorch. It allows researchers and practitioners to experiment with machine learning models in scenarios where noisy labels are present, helping to evaluate model robustness and performance under such conditions.

In [None]:
# Loading Train and Test data
transform = transforms.Compose([transforms.ToTensor()])
base_train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

# Splitting train data into train and validation sets
total_samples = len(base_train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

base_train_data, val_data = random_split(base_train_data, [train_size, validation_size])

# Define the noise percentages
noise_percentages = [20, 40, 60, 80, 100]

# Create datasets with different percentages of randomized labels
noisy_datasets = [NoisyCIFAR10(base_train_data, percentage) for percentage in noise_percentages]

# Data Loaders for all the train, validation, and test sets
train_loaders = []
for dataset in noisy_datasets:
    train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
    train_loaders.append(train_loader)
val_loader = DataLoader(val_data,batch_size=256,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=256,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Loop through each index and associated train_loader in enumerate(train_loaders)
for index, train_loader in enumerate(train_loaders):
    # Initialize the model, loss function, optimizer, and other necessary variables
    mlp = AllCNN()
    loss_fun = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
    epoch = 0
    prev_val_acc = 0.0
    tol_epochs = 0
    max_epochs = 2

    # Training loop
    while epoch < max_epochs and tol_epochs < 2:
        epoch += 1
        print(f'Epoch {epoch}')
        current_loss = []
        current_acc = []
        tolerance = 1e-4
        val_acc = []

        # Training
        for i, data in enumerate(train_loader, 0):
            inputs, targets = data
            optimizer.zero_grad()
            outputs = mlp(inputs)

            # Compute loss and perform backward pass
            loss = loss_fun(outputs, targets)
            loss.backward()
            optimizer.step()
            current_loss.append(loss.item())

            # Compute accuracy
            pred = torch.max(outputs, 1).indices
            acc = (targets == pred).sum().item()
            current_acc.append(acc / targets.shape[0])

        # Validation
        for i, data in enumerate(val_loader, 0):
            inputs, targets = data
            val_outputs = mlp(inputs)
            pred = torch.max(val_outputs, 1).indices
            acc = (targets == pred).sum().item()
            val_acc.append(acc / targets.shape[0])

        # Performance Evaluation
        print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
        print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
        print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

        # Early Stopping Criteria
        update = (sum(val_acc) / len(val_acc) - prev_val_acc)
        print('update: %.4f' % update)
        print("--------------------------------------------------------------------------------------------------")

        if 0 < update < tolerance:
            tol_epochs += 1
        else:
            tol_epochs = 0

        prev_val_acc = sum(val_acc) / len(val_acc)

    # Testing
    test_acc = []
    for i, data in enumerate(test_loader, 0):
        inputs, targets = data
        test_outputs = mlp(inputs)
        pred = torch.max(test_outputs, 1).indices
        acc = (targets == pred).sum().item()
        test_acc.append(acc / targets.shape[0])

    # Print testing accuracy and noise percentage
    print(f'Dataset with {noise_percentages[index]} % noise')
    print('Test_Acc  : %.3f' % (sum(test_acc) / len(test_acc)))
    print('*********************************************************************************************************')

Epoch 1
Loss : 2.250
Training_Acc  : 0.186
Validation_Acc  : 0.236
update: 0.2360
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.187
Training_Acc  : 0.261
Validation_Acc  : 0.296
update: 0.0599
--------------------------------------------------------------------------------------------------
Dataset with 20 % noise
Test_Acc  : 0.284
*********************************************************************************************************
Epoch 1
Loss : 2.257
Training_Acc  : 0.179
Validation_Acc  : 0.239
update: 0.2393
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.203
Training_Acc  : 0.244
Validation_Acc  : 0.270
update: 0.0309
--------------------------------------------------------------------------------------------------
Dataset with 40 % noise
Test_Acc  : 0.276
**************************************************************************************

## IOC-AllConv

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])

In [None]:
# Loading Train and Test data
base_train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

# Splitting train data into train and validation sets
total_samples = len(base_train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

base_train_data, val_data = random_split(base_train_data, [train_size, validation_size])

# Define the noise percentages
noise_percentages = [20, 40, 60, 80, 100]

# Create datasets with different percentages of randomized labels
noisy_datasets = [NoisyCIFAR10(base_train_data, percentage) for percentage in noise_percentages]

# Data Loaders for all the train, validation, and test sets
train_loaders = []
for dataset in noisy_datasets:
    train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
    train_loaders.append(train_loader)
val_loader = DataLoader(val_data,batch_size=256,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=256,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch

# Iterate through each training loader
for index, train_loader in enumerate(train_loaders):
    # Initialize model, loss function, optimizer, and other variables
    mlp = IOC_AllCNN()
    loss_fun = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
    epoch = 0
    prev_val_acc = 0.0
    tol_epochs = 0
    max_epochs = 2

    # Training loop
    while epoch < max_epochs and tol_epochs < 2:
        epoch += 1
        print(f'Epoch {epoch}')
        current_loss = []
        current_acc = []
        tolerance = 1e-4
        val_acc = []

        # Training
        for i, data in enumerate(train_loader, 0):
            inputs, targets = data
            optimizer.zero_grad()
            outputs = mlp(inputs)

            # Calculate loss and perform backpropagation
            loss = loss_fun(outputs, targets)
            loss.backward()
            optimizer.step()

            # Apply weight exponentiation
            mlp.apply(WeightExponentiation())
            current_loss.append(loss.item())

            # Calculate training accuracy
            pred = torch.max(outputs, 1).indices
            acc = (targets == pred).sum().item()
            current_acc.append(acc / targets.shape[0])

        # Validation loop (assuming val_loader is defined somewhere)
        for i, data in enumerate(val_loader, 0):
            inputs, targets = data
            val_outputs = mlp(inputs)
            pred = torch.max(val_outputs, 1).indices
            acc = (targets == pred).sum().item()
            val_acc.append(acc / targets.shape[0])

        # Performance Evaluation
        print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
        print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
        print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

        # Early Stopping Criteria
        update = (sum(val_acc) / len(val_acc) - prev_val_acc)
        print('update: %.4f' % update)
        print("--------------------------------------------------------------------------------------------------")

        if update > 0 and update < tolerance:
            tol_epochs += 1
        else:
            tol_epochs = 0

        prev_val_acc = sum(val_acc) / len(val_acc)

    # Testing loop
    test_acc = []
    for i, data in enumerate(test_loader, 0):
        inputs, targets = data
        test_outputs = mlp(inputs)
        pred = torch.max(test_outputs, 1).indices
        acc = (targets == pred).sum().item()
        test_acc.append(acc / targets.shape[0])

    # Print results for the current dataset
    print(f'Dataset with {noise_percentages[index]} % noise')
    print('Test_Acc  : %.3f' % (sum(test_acc) / len(test_acc)))
    print('*********************************************************************************************************')


Epoch 1
Loss : 2.128
Training_Acc  : 0.451
Validation_Acc  : 0.533
update: 0.5331
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.047
Training_Acc  : 0.564
Validation_Acc  : 0.582
update: 0.0493
--------------------------------------------------------------------------------------------------
Dataset with 20 % noise
Test_Acc  : 0.579
*********************************************************************************************************
Epoch 1
Loss : 2.130
Training_Acc  : 0.443
Validation_Acc  : 0.523
update: 0.5229
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.048
Training_Acc  : 0.561
Validation_Acc  : 0.588
update: 0.0651
--------------------------------------------------------------------------------------------------
Dataset with 40 % noise
Test_Acc  : 0.580
**************************************************************************************

# Exp 4. Boosted Ensemble

## IOC-AllConv

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Creating a transform object with whitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
   WhiteningTransform()
])
# Loading the CIFAR10 dataset with whitening transformation
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 64

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43428993.45it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [None]:
# Class for Weight Exponentiation
class WeightExponentiation(object):

    def __init__(self, epsilon = 5):
        self.epsilon = epsilon # epsilon for constraining exponentiation of weights


    def __call__(self, module):
        if hasattr(module, 'weight'):
            # Selecting all other layers except "first_hidden_layer"
            if "first_hidden_layer" not in module.__str__() and "fc0" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0]-self.epsilon)
                module.weight.data = w # Storing the updated weights


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

class ExpertEnsemble(nn.Module):
    def __init__(self, base_learner, num_experts: int = 3):
        """
        Initialize ExpertEnsemble.

        Parameters:
            base_learner (nn.Module): Base learner model.
            num_experts (int): Number of experts in the ensemble.
        """
        super().__init__()
        self.num_experts = num_experts
        self.experts = nn.ModuleList([base_learner().to(device) for _ in range(self.num_experts)])
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        """
        Forward pass through the ensemble.

        Parameters:
            x (torch.Tensor): Input tensor.

        Returns:
            list: List of outputs from individual experts.
        """
        expert_outputs = [expert(x) for expert in self.experts]
        return expert_outputs

    def train_experts(self, train_loader):
        """
        Train individual experts in the ensemble.

        Parameters:
            train_loader (DataLoader): DataLoader for training data.
        """
        weights = torch.ones(len(train_loader.dataset))
        for i in range(self.num_experts):
            print("Expert:", i)
            self.train_single_expert(self.experts[i], train_loader)
            weights = self.update_sample_weights(self.experts[i], train_loader.dataset, weights)
            train_loader = self.bootstrap_dataloader(train_loader, weights)

    def train_single_expert(self, model, train_loader):
        """
        Train a single expert in the ensemble.

        Parameters:
            model (nn.Module): Expert model to train.
            train_loader (DataLoader): DataLoader for training data.
        """
        model.train()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=[0.9, 0.9])
        max_epochs = 2
        for epoch in range(max_epochs):
            print(f'Epoch {epoch + 1}')
            current_loss = []
            current_acc = []
            for i, data in enumerate(train_loader, 0):
                inputs, targets = data
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = self.criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                model.apply(WeightExponentiation())
                current_loss.append(loss.item())
                pred = torch.max(outputs, 1).indices
                acc = (targets == pred).sum().item()
                current_acc.append(acc / targets.shape[0])
            print('Loss: %.3f' % (sum(current_loss) / len(current_loss)))
            print('Training Accuracy: %.3f' % (sum(current_acc) / len(current_acc)))

    def update_sample_weights(self, weak_learner, dataset, sample_weights, learning_rate=0.001):
        """
        Update sample weights based on the performance of a weak learner.

        Parameters:
            weak_learner (nn.Module): Weak learner model.
            dataset (torch.utils.data.Dataset): Dataset used for training.
            sample_weights (torch.Tensor): Current sample weights.
            learning_rate (float): Learning rate for updating sample weights.

        Returns:
            torch.Tensor: Updated sample weights.
        """
        criterion = nn.CrossEntropyLoss(reduction='none')
        dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=True)
        for i, data in enumerate(dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            weak_learner.to(device)
            outputs = weak_learner(inputs)
            loss = criterion(outputs, labels)
            misclassifications = (outputs.argmax(dim=1) != labels).float()
            sample_weights = sample_weights.to(device)
            sample_weights *= torch.exp(learning_rate * misclassifications)
            sample_weights /= sample_weights.sum()
        return sample_weights

    def bootstrap_dataloader(self, dataloader, sample_weights):
        """
        Create a new DataLoader by bootstrapping from the original DataLoader based on sample weights.

        Parameters:
            dataloader (DataLoader): Original DataLoader.
            sample_weights (torch.Tensor): Sample weights used for bootstrapping.

        Returns:
            DataLoader: Bootstrapped DataLoader.
        """
        num_samples = len(dataloader.dataset)
        bootstrap_indices = torch.multinomial(sample_weights, num_samples, replacement=True).int()
        return DataLoader([dataloader.dataset[i] for i in bootstrap_indices])

In [None]:
class BoostingEnsemble:
    def __init__(self, num_experts, base_learner, gating_network, train_loader, test_loader):
        """
        Initialize the Boosting Ensemble.

        Parameters:
            num_experts (int): Number of experts in the ensemble.
            base_learner (nn.Module): Base learner model.
            gating_network (nn.Module): Gating network model.
            train_loader (DataLoader): DataLoader for training data.
            test_loader (DataLoader): DataLoader for testing data.
        """
        self.num_experts = num_experts
        self.base_learner = base_learner
        self.gating_network = gating_network(num_experts).to(device)
        self.train_loader = train_loader
        self.test_loader = test_loader

    def expert_training(self):
        """
        Train individual experts in the ensemble.
        """
        model = ExpertEnsemble(self.base_learner, self.num_experts)
        model.train_experts(self.train_loader)
        print("Expert Training Completed")
        self.trained_experts = model.experts

        # Set requires_grad=True for expert parameters
        for expert in self.trained_experts:
            for param in expert.parameters():
                param.requires_grad = True

    def gn_training(self):
        """
        Train the gating network.
        """
        optimizer = torch.optim.Adam(self.gating_network.parameters(), lr=1e-4, betas=[0.9, 0.9])
        max_epochs = 5
        loss_fun = nn.CrossEntropyLoss()

        for epoch in range(max_epochs):
            print(f'Epoch {epoch + 1}')
            current_loss = []
            current_acc = []

            # Training
            for i, data in enumerate(self.train_loader, 0):
                inputs, targets = data
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()

                # Combine outputs of individual experts
                combined_outputs = torch.cat([expert(inputs) for expert in self.trained_experts], dim=1)

                # Forward pass through the gating network
                outputs = self.gating_network(combined_outputs)
                loss = loss_fun(outputs, targets)

                loss.backward()
                optimizer.step()
                self.gating_network.apply(WeightExponentiation())
                current_loss.append(loss.item())

                pred = torch.max(outputs, 1).indices
                acc = (targets == pred).sum().item()
                current_acc.append(acc / targets.shape[0])

            # Performance Evaluation
            print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
            print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))

        print("Training of Gating Network Completed")

    def testing(self):
        """
        Test the ensemble model.
        """
        test_acc = []
        for i, data in enumerate(self.test_loader, 0):
            inputs, targets = data
            inputs, targets = inputs.to(device), targets.to(device)

            # Combine outputs of individual experts
            combined_outputs = torch.cat([expert(inputs) for expert in self.trained_experts], dim=1)

            # Forward pass through the gating network
            test_outputs = self.gating_network(combined_outputs)

            pred = torch.max(test_outputs, 1).indices
            acc = (targets == pred).sum().item()
            test_acc.append(acc / targets.shape[0])

        print('Test_Acc  : %.3f' % (sum(test_acc) / len(test_acc)))

In [None]:
class CNN_Gating(nn.Module):
    def __init__(self, n_experts: int = 3):
        """
        Initialize the CNN_Gating module.

        Parameters:
            n_experts (int): Number of experts.
        """
        super().__init__()

        # Add extra filters to the first convolutional layer
        self.first_conv_layer = nn.Conv2d(3, 192, 3)

        # Define layers
        self.layers = nn.Sequential(
            # Fully connected layer to reshape input
            nn.Linear(3 * 10, 3 * 32 * 32),  # Assuming input size of 3x10 and output size of 3x32x32
            Reshape((-1, 3, 32, 32)),  # Reshape to (batch_size, 3, 32, 32)

            self.first_conv_layer,
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 96, 3),
            nn.BatchNorm2d(96),
            nn.ELU(True),

            nn.Conv2d(96, 96, 3, 2),
            nn.BatchNorm2d(96),
            nn.ELU(True),

            nn.Conv2d(96, 192, 3),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 192, 1),
            nn.BatchNorm2d(192),
            nn.ELU(True),

            nn.Conv2d(192, 10, 1),
            nn.BatchNorm2d(10),
            nn.ELU(True)
        )

        # Adaptive average pooling
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))

        # Softmax layer
        self.softmax = nn.Softmax(1)

    def forward(self, x):
        """
        Forward pass of the CNN_Gating module.

        Parameters:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor after softmax.
        """
        x = self.layers(x)
        x = self.pooling(x)
        x = x.squeeze()
        return self.softmax(x)

In [None]:
Boosting_Ensemble(3,IOC_AllCNN,CNN_Gating,train_loader)