In [None]:
import re
from collections import OrderedDict
from functools import partial
from typing import Any, List, Optional, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as cp
from torch import Tensor

import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader,random_split
import torch.optim as optim
from tqdm import tqdm

This notebook demonstrate experiments on baseline DenseNet and its Input Ouput Convex(IOC) counterpart. Here's a summary of the key components and experiments covered:

Exp 1: **DenseNet and IOC-DenseNet FOR CIFAR-10**:
   - Implementing baseline architecture of DenseNet and constructing an IOC-DenseNet architecture enforcing convexity constraints similar to that of IOC-NN.

Exp 2: **Training on Duplicate Free data (ciFAIR10)**:
   - We use the same implementation of DenseNet and IOC-DenseNet on ciFAIR10 data set.

Exp 3: **BoostedEnsemble**:

- **Initialization:** ExpertEnsemble setup with base model and expert count.
- **Training:** Experts trained separately with bootstrapped data, updating weights based on performance.   

# Exp 1. DenseNet and IOC-DenseNet on CIFAR10

## DenseNet

In [None]:
'''This class has been taken from the original source code of the DenseNet'''
class _DenseLayer(nn.Module):
    def __init__(
        self, num_input_features: int, growth_rate: int, bn_size: int, drop_rate: float, memory_efficient: bool = False
    ) -> None:
        super().__init__()
        self.norm1 = nn.BatchNorm2d(num_input_features)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)

        self.norm2 = nn.BatchNorm2d(bn_size * growth_rate)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)

        self.drop_rate = float(drop_rate)
        self.memory_efficient = memory_efficient

    def bn_function(self, inputs: List[Tensor]) -> Tensor:
        concated_features = torch.cat(inputs, 1)
        bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features)))
        return bottleneck_output

    # todo: rewrite when torchscript supports any
    def any_requires_grad(self, input: List[Tensor]) -> bool:
        for tensor in input:
            if tensor.requires_grad:
                return True
        return False

    @torch.jit.unused
    def call_checkpoint_bottleneck(self, input: List[Tensor]) -> Tensor:
        def closure(*inputs):
            return self.bn_function(inputs)

        return cp.checkpoint(closure, *input)

    def forward(self, input: Tensor) -> Tensor:
        if isinstance(input, Tensor):
            prev_features = [input]
        else:
            prev_features = input

        if self.memory_efficient and self.any_requires_grad(prev_features):
            if torch.jit.is_scripting():
                raise Exception("Memory Efficient not supported in JIT")

            bottleneck_output = self.call_checkpoint_bottleneck(prev_features)
        else:
            bottleneck_output = self.bn_function(prev_features)

        new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
        return new_features

In [None]:
'''This class has been taken from the original source code of the DenseNet'''
class _DenseBlock(nn.ModuleDict):
    _version = 2

    def __init__(
        self,
        num_layers: int,
        num_input_features: int,
        bn_size: int,
        growth_rate: int,
        drop_rate: float,
        memory_efficient: bool = False,
    ) -> None:
        super().__init__()
        for i in range(num_layers):
            layer = _DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,
            )
            self.add_module("denselayer%d" % (i + 1), layer)

    def forward(self, init_features: Tensor) -> Tensor:
        features = [init_features]
        for name, layer in self.items():
            new_features = layer(features)
            features.append(new_features)
        return torch.cat(features, 1)

In [None]:
'''This class has been taken from the original source code of the DenseNet'''
class _Transition(nn.Sequential):
    def __init__(self, num_input_features: int, num_output_features: int) -> None:
        super().__init__()
        self.norm = nn.BatchNorm2d(num_input_features)
        self.relu = nn.ReLU(inplace=True)
        self.conv = nn.Conv2d(num_input_features, num_output_features, kernel_size=1, stride=1, bias=False)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)

In [None]:
'''This class has been taken from the original source code of the DenseNet'''
class DenseNet(nn.Module):

    def __init__(
        self,
        growth_rate: int = 24,
        block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),
        num_init_features: int = 64,
        bn_size: int = 4,
        drop_rate: float = 0,
        num_classes: int = 1000,
        memory_efficient: bool = False,
    ) -> None:

        super().__init__()


        # First convolution
        self.features = nn.Sequential(
            OrderedDict(
                [
                    ("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
                    ("norm0", nn.BatchNorm2d(num_init_features)),
                    ("relu0", nn.ReLU(inplace=True)),
                    ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
                ]
            )
        )

        # Each denseblock
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(
                num_layers=num_layers,
                num_input_features=num_features,
                bn_size=bn_size,
                growth_rate=growth_rate,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,
            )
            self.features.add_module("denseblock%d" % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module("transition%d" % (i + 1), trans)
                num_features = num_features // 2

        # Final batch norm
        self.features.add_module("norm5", nn.BatchNorm2d(num_features))

        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)

        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x: Tensor) -> Tensor:
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

In [None]:
# Loading Trian and Test data
transform = transforms.Compose([transforms.ToTensor()])
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 256
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [None]:

# Define the model
mlp = DenseNet(growth_rate=12,
               block_config=(6, 12, 24, 16),
               num_init_features=64,
               bn_size=4,
               drop_rate=0,
               num_classes=10)

# Loss function
loss_fun = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)

# Initialize variables for tracking epochs and tolerance
epoch = 0
tol_epochs = 0
max_epochs = 2
prev_val_acc = 0.0

# Training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')

    current_loss = []
    current_acc = []
    val_acc = []

    # Training
    mlp.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = mlp(inputs)
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss.append(loss.item())

        _, pred = torch.max(outputs, 1)
        acc = (pred == targets).float().mean().item()
        current_acc.append(acc)

    # Validation
    mlp.eval()
    with torch.no_grad():
        for inputs, targets in val_loader:
            val_outputs = mlp(inputs)
            _, pred = torch.max(val_outputs, 1)
            acc = (pred == targets).float().mean().item()
            val_acc.append(acc)

    # Print training and validation performance
    print('Loss: %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training Accuracy: %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation Accuracy: %.3f' % (sum(val_acc) / len(val_acc)))

    # Early stopping criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('Update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if 0 < update < 1e-4:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 1.983
Training_Acc  : 0.283
Validation_Acc  : 0.370
update: 0.3696
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 1.627
Training_Acc  : 0.408
Validation_Acc  : 0.422
update: 0.0526
--------------------------------------------------------------------------------------------------


In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.432


## IOC-DenseNet

Following modifications are incorporated to the above considered predefined classes

1. **Customized Initial Convolutional Layer**:
   - Increased the number of filters in the initial convolutional layer (`conv0`) from 3 to 128 in the `IOC_DenseNet` class, enhancing the model's capacity to capture complex patterns from input images.

2. **Activation Function Modification**:
   - Replaced ReLU activation functions with ELU (Exponential Linear Unit) activation functions after batch normalization in the initial convolutional layer of the `IOC_DenseNet` class. ELU may offer advantages over ReLU, such as reduced vanishing gradient problems and improved learning dynamics.

3. **Adjustments in Transition Layers**:
   - Modified the transition layers in the `IOC_DenseNet` class to maintain the increased number of filters in the first convolutional block. This ensures seamless transition between dense blocks while preserving feature maps' dimensions and depth.

5. **Consistent Initialization**:
   - Ensured consistent weight initialization across different layers of the network using techniques such as Kaiming initialization for convolutional layers and constant initialization for batch normalization and linear layers. This helps in stabilizing the training process and improving convergence.

In [None]:
class _DenseLayer(nn.Module):
    def __init__(
        self, num_input_features: int, growth_rate: int, bn_size: int, drop_rate: float, memory_efficient: bool = False,
    ) -> None:
        super().__init__()

        # Batch normalization and ELU activation before the first convolutional layer
        self.norm1 = nn.BatchNorm2d(num_input_features)
        self.elu1 = nn.ELU(inplace=True)
        self.conv1 = nn.Conv2d(num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)

        # Batch normalization and ELU activation before the second convolutional layer
        self.norm2 = nn.BatchNorm2d(bn_size * growth_rate)
        self.elu2 = nn.ELU(inplace=True)
        self.conv2 = nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)

        # Dropout rate and memory_efficient flag
        self.drop_rate = float(drop_rate)
        self.memory_efficient = memory_efficient

    # Function to perform batch normalization, ELU activation, and convolution
    def bn_function(self, inputs: List[Tensor]) -> Tensor:
        concated_features = torch.cat(inputs, 1)
        bottleneck_output = self.conv1(self.elu1(self.norm1(concated_features)))
        return bottleneck_output

    # Method to check if any tensor in the input list requires gradients
    # Currently unused, may be used for future optimizations
    def any_requires_grad(self, input: List[Tensor]) -> bool:
        for tensor in input:
            if tensor.requires_grad:
                return True
        return False

    # Method for memory-efficient training using checkpointing
    @torch.jit.unused
    def call_checkpoint_bottleneck(self, input: List[Tensor]) -> Tensor:
        def closure(*inputs):
            return self.bn_function(inputs)

        return cp.checkpoint(closure, *input)

    # Forward pass through the DenseLayer
    def forward(self, input: Tensor) -> Tensor:
        if isinstance(input, Tensor):
            prev_features = [input]
        else:
            prev_features = input

        # Check if memory-efficient training is enabled and any tensor requires gradients
        if self.memory_efficient and self.any_requires_grad(prev_features):
            if torch.jit.is_scripting():
                raise Exception("Memory Efficient not supported in JIT")

            # Use checkpointing for memory-efficient training
            bottleneck_output = self.call_checkpoint_bottleneck(prev_features)
        else:
            # Compute bottleneck output directly
            bottleneck_output = self.bn_function(prev_features)

        # Compute new features using the second convolutional layer
        new_features = self.conv2(self.elu2(self.norm2(bottleneck_output)))

        # Apply dropout if applicable
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
        return new_features

In [None]:
class _DenseBlock(nn.ModuleDict):
    _version = 2

    def __init__(
        self,
        num_layers: int,
        num_input_features: int,
        bn_size: int,
        growth_rate: int,
        drop_rate: float,
        memory_efficient: bool = False,

    ) -> None:
        super().__init__()

        for i in range(num_layers):
            layer = _DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,

            )
            self.add_module("denselayer%d" % (i + 1), layer)

    def forward(self, init_features: Tensor) -> Tensor:
        features = [init_features]
        for name, layer in self.items():
            new_features = layer(features)
            features.append(new_features)
        return torch.cat(features, 1)

In [None]:
class _Transition(nn.Sequential):
    def __init__(self, num_input_features: int, num_output_features: int) -> None:
        super().__init__()

        # Batch normalization layer
        self.norm = nn.BatchNorm2d(num_input_features)

        # ELU activation layer inplace
        self.elu = nn.ELU(inplace=True)

        # Convolutional layer with kernel size 1x1
        self.conv = nn.Conv2d(num_input_features, num_output_features, kernel_size=1, stride=1, bias=False)

        # Average pooling layer with kernel size 2x2 and stride 2
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)


In [None]:
class IOC_DenseNet(nn.Module):
    def __init__(
        self,
        growth_rate: int = 24,
        block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),
        num_init_features: int = 64,
        bn_size: int = 4,
        drop_rate: float = 0,
        num_classes: int = 10,
        memory_efficient: bool = False,
    ) -> None:
        super().__init__()

        # Customized first convolution with increased number of filters
        self.conv = OrderedDict(
            [
                ("conv0", nn.Conv2d(3, 128, kernel_size=7, stride=2, padding=3, bias=False)),  # Increased No of Filters of the first conv layer
                ("norm0", nn.BatchNorm2d(128)),
                ("elu0", nn.ELU(inplace=True)),
                ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
            ]
        )

        # First convolution
        self.features = nn.Sequential(self.conv)

        # Each denseblock
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            num_features = 128 if i == 0 else num_features  # Increasing no of filters in first layer
            block = _DenseBlock(
                num_layers=num_layers,
                num_input_features=num_features,
                bn_size=bn_size,
                growth_rate=growth_rate,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,
            )
            self.features.add_module("denseblock%d" % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module("transition%d" % (i + 1), trans)
                num_features = num_features // 2

        # Final batch norm
        self.features.add_module("norm5", nn.BatchNorm2d(num_features))

        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)

        # init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x: Tensor) -> Tensor:
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

In [None]:
# Class to perform whitening Transform
class WhiteningTransform:
    def __init__(self):
        self.whitening = None

    def fit(self, data):
        mean = data.mean(dim=0)
        centered_data = data - mean

        cov_matrix = torch.mm(centered_data.t(), centered_data) / data.size(0)
        U, S, V = torch.svd(cov_matrix)
        self.whitening = torch.mm(centered_data, U) / torch.sqrt(S + 1e-10)

    def __call__(self, x):
        if self.whitening is not None:
            x = self.whitening.t() @ x.reshape(x.size(0), -1).t()
            x = x.t().reshape(x.size(0), 3, 32, 32)
            return x
        return x

# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])

In [None]:
# Loading Trian and Test data
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 256
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Class for Weight Exponentiation
class WeightExponentiation(object):

    def __init__(self, epsilon = 5):
        self.epsilon = epsilon # epsilon for constraining exponentiation of weights


    def __call__(self, module):
        if hasattr(module, 'weight'):
            # Selecting all other layers except "first_hidden_layer"
            if "conv" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0]-self.epsilon)
                module.weight.data = w # Storing the updated weights


In [None]:
mlp = IOC_DenseNet(growth_rate = 12,
        block_config = (6, 12, 24, 16),
        num_init_features = 64,
        bn_size = 4,
        drop_rate = 0,
        num_classes = 10,
        )


loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4,betas=(0.9,0.9))
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

while(epoch <max_epochs and tol_epochs<2):
    epoch+=1
    print(f'Epoch {epoch}')
    current_loss = []
    current_acc = []
    tolerance = 1e-4
    val_acc = []

    # Training
    for i, data in enumerate(train_loader, 0):

        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)


        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()
        mlp.apply(WeightExponentiation())

        current_loss.append(loss.item())

        pred = torch.max(outputs,1).indices
        acc= (targets == pred).sum().item()
        current_acc.append(acc/targets.shape[0])

    # Validation
    for i,data in enumerate(val_loader,0):
        inputs,targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs,1).indices
        acc =(targets==pred).sum().item()
        val_acc.append(acc/targets.shape[0])

    # Performance Evaluation

    print('Loss : %.3f' %(sum(current_loss) /len(current_loss)))
    print('Training_Acc  : %.3f'%(sum(current_acc)/len(current_acc)))
    print('Validation_Acc  : %.3f'%(sum(val_acc)/len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    if(update>0 and update<tolerance):
        tol_epochs+=1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc)/len(val_acc)

Epoch 1
Loss : 2.285
Training_Acc  : 0.135
Validation_Acc  : 0.142
update: 0.1421
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.272
Training_Acc  : 0.144
Validation_Acc  : 0.155
update: 0.0133
--------------------------------------------------------------------------------------------------


In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.155


# Exp 2. Training on Duplicate Free data (ciFAIR10)

## DenseNet

In [None]:
import torchvision.datasets

class ciFAIR10(torchvision.datasets.CIFAR10):
    base_folder = 'ciFAIR-10'
    url = 'https://github.com/cvjena/cifair/releases/download/v1.0/ciFAIR-10.zip'
    filename = 'ciFAIR-10.zip'
    tgz_md5 = 'ca08fd390f0839693d3fc45c4e49585f'
    test_list = [
        ['test_batch', '01290e6b622a1977a000eff13650aca2'],
    ]

In [None]:
# Loading Trian and Test data
transform = transforms.Compose([transforms.ToTensor()])
train_data = ciFAIR10('data', train=True, download=True, transform=transform)
test_data = ciFAIR10('data', train=False, download=True, transform=transform)

batch_size = 256
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn

# Initialize DenseNet model
mlp = DenseNet(growth_rate=12,
               block_config=(6, 12, 24, 16),
               num_init_features=64,
               bn_size=4,
               drop_rate=0,
               num_classes=10)

# Define loss function and optimizer
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)

# Initialize variables for training loop
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')

    # Lists to store loss and accuracy for training and validation
    current_loss = []
    current_acc = []
    val_acc = []

    # Training loop
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()
        current_loss.append(loss.item())
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item()
        current_acc.append(acc / targets.shape[0])

    # Validation loop
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item()
        val_acc.append(acc / targets.shape[0])

    # Performance Evaluation
    print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    # Check for improvement in validation accuracy
    if update > 0 and update < tolerance:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 1.636
Training_Acc  : 0.425
Validation_Acc  : 0.503
update: 0.5025
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 1.267
Training_Acc  : 0.551
Validation_Acc  : 0.560
update: 0.0574
--------------------------------------------------------------------------------------------------


In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.552


## IOC_DenseNet

In [None]:
# Creating a transform object withwhitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
    WhiteningTransform()
])

# Loading Trian and Test data
train_data = ciFAIR10('data', train=True, download=True, transform=transform)
test_data = ciFAIR10('data', train=False, download=True, transform=transform)

batch_size = 256
# Spliting train data into train and validation sets
total_samples = len(train_data)
train_ratio = 0.8
validation_ratio = 0.2

train_size = int(train_ratio * total_samples)
validation_size = total_samples - train_size

train_data, val_data = random_split(train_data, [train_size, validation_size])

# Data Loaders for all the train,val and test sets
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn

# Initialize IOC_DenseNet model
mlp = IOC_DenseNet(growth_rate=12,
                   block_config=(6, 12, 24, 16),
                   num_init_features=64,
                   bn_size=4,
                   drop_rate=0,
                   num_classes=10)

# Define loss function and optimizer
loss_fun = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4, betas=(0.9, 0.9))

# Initialize variables for training loop
epoch = 0
prev_val_acc = 0.0
tol_epochs = 0
max_epochs = 2

# Main training loop
while epoch < max_epochs and tol_epochs < 2:
    epoch += 1
    print(f'Epoch {epoch}')

    # Lists to store loss and accuracy for training and validation
    current_loss = []
    current_acc = []
    val_acc = []

    # Training loop
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = mlp(inputs)
        loss = loss_fun(outputs, targets)
        loss.backward()
        optimizer.step()

        # Apply weight exponentiation after each update
        mlp.apply(WeightExponentiation())

        current_loss.append(loss.item())
        pred = torch.max(outputs, 1).indices
        acc = (targets == pred).sum().item()
        current_acc.append(acc / targets.shape[0])

    # Validation loop
    for i, data in enumerate(val_loader, 0):
        inputs, targets = data
        val_outputs = mlp(inputs)
        pred = torch.max(val_outputs, 1).indices
        acc = (targets == pred).sum().item()
        val_acc.append(acc / targets.shape[0])

    # Performance Evaluation
    print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
    print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))
    print('Validation_Acc  : %.3f' % (sum(val_acc) / len(val_acc)))

    # Early Stopping Criteria
    update = (sum(val_acc) / len(val_acc) - prev_val_acc)
    print('update: %.4f' % update)
    print("--------------------------------------------------------------------------------------------------")

    # Check for improvement in validation accuracy
    if update > 0 and update < tolerance:
        tol_epochs += 1
    else:
        tol_epochs = 0

    prev_val_acc = sum(val_acc) / len(val_acc)

Epoch 1
Loss : 2.283
Training_Acc  : 0.139
Validation_Acc  : 0.144
update: 0.1438
--------------------------------------------------------------------------------------------------
Epoch 2
Loss : 2.270
Training_Acc  : 0.149
Validation_Acc  : 0.147
update: 0.0036
--------------------------------------------------------------------------------------------------


In [None]:
test_acc = []
for i,data in enumerate(test_loader,0):
    inputs,targets = data
    test_outputs = mlp(inputs)
    pred = torch.max(test_outputs,1).indices
    acc =(targets==pred).sum().item()
    test_acc.append(acc/targets.shape[0])
print('Test_Acc  : %.3f'%(sum(test_acc)/len(test_acc)))

Test_Acc  : 0.153


# Exp 3. Boosted Ensemble

## IOC-DenseNet

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Creating a transform object with whitening transform
transform = transforms.Compose([
    transforms.ToTensor(),
   WhiteningTransform()
])
# Loading the MNIST dataset with whitening transformation
train_data = CIFAR10('data', train=True, download=True, transform=transform)
test_data = CIFAR10('data', train=False, download=True, transform=transform)

batch_size = 64

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43428993.45it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [None]:
# Class for Weight Exponentiation
class WeightExponentiation(object):

    def __init__(self, epsilon = 5):
        self.epsilon = epsilon # epsilon for constraining exponentiation of weights


    def __call__(self, module):
        if hasattr(module, 'weight'):
            # Selecting all other layers except "first_hidden_layer"
            if "first_hidden_layer" not in module.__str__() and "fc0" not in module.__str__():
                w = module.weight.data
                # Perform exponentiation operation on negative weights
                w[w < 0] = torch.exp(w[w < 0]-self.epsilon)
                module.weight.data = w # Storing the updated weights


In [None]:
import torch
import torch.nn as nn

class Reshape(nn.Module):
    """
    A custom PyTorch module to reshape input tensors to a specified shape.
    """

    def __init__(self, shape):
        """
        Constructor method to initialize the Reshape module.

        Args:
            shape (tuple): Desired shape for the output tensor.
        """
        super(Reshape, self).__init__()
        self.shape = shape

    def forward(self, x):
        """
        Forward pass of the Reshape module.

        Args:
            x (torch.Tensor): Input tensor to be reshaped.

        Returns:
            torch.Tensor: Reshaped tensor with the specified shape.
        """
        # Reshape the input tensor to the specified shape
        return torch.reshape(x, self.shape)


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

class ExpertEnsemble(nn.Module):
    """
    A class representing an ensemble of expert models for training.

    Args:
        base_learner (nn.Module): Base learner model used for individual experts.
        num_experts (int): Number of experts in the ensemble.
    """

    def __init__(self, base_learner, num_experts: int = 3):
        super().__init__()
        self.num_experts = num_experts
        self.experts = nn.ModuleList([base_learner().to(device) for _ in range(self.num_experts)])
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        """
        Forward pass of the ensemble model.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            list: List of outputs from individual experts.
        """
        expert_outputs = [expert(x) for expert in self.experts]
        return expert_outputs

    def train_experts(self, train_loader):
        """
        Train individual experts in the ensemble.

        Args:
            train_loader (DataLoader): DataLoader for training data.
        """
        weights = torch.ones(len(train_loader.dataset))
        for i in range(self.num_experts):
            print("Expert:", i)
            self.train_single_expert(self.experts[i], train_loader)
            weights = self.update_sample_weights(self.experts[i], train_loader.dataset, weights)
            train_loader = self.bootstrap_dataloader(train_loader, weights)

    def train_single_expert(self, model, train_loader):
        """
        Train a single expert model.

        Args:
            model (nn.Module): Expert model to train.
            train_loader (DataLoader): DataLoader for training data.
        """
        model.train()
        train_loader = DataLoader(train_loader.dataset, batch_size=64, shuffle=True)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=[0.9, 0.9])
        max_epochs = 2

        for epoch in range(max_epochs):
            print(f'Epoch {epoch + 1}')
            current_loss = []
            current_acc = []

            # Training
            for i, data in enumerate(train_loader, 0):
                inputs, targets = data
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = self.criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                model.apply(WeightExponentiation())
                current_loss.append(loss.item())

                pred = torch.max(outputs, 1).indices
                acc = (targets == pred).sum().item()
                current_acc.append(acc / targets.shape[0])

            # Performance Evaluation
            print('Loss: %.3f' % (sum(current_loss) / len(current_loss)))
            print('Training Accuracy: %.3f' % (sum(current_acc) / len(current_acc)))

    def update_sample_weights(self, weak_learner, dataset, sample_weights, learning_rate=0.001):
        """
        Update sample weights based on weak learner predictions.

        Args:
            weak_learner (nn.Module): Weak learner model.
            dataset (torch.utils.data.Dataset): Dataset used for training.
            sample_weights (torch.Tensor): Current sample weights.
            learning_rate (float): Learning rate for updating weights.

        Returns:
            torch.Tensor: Updated sample weights.
        """
        criterion = nn.CrossEntropyLoss(reduction='none')
        dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=True)

        for i, data in enumerate(dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            weak_learner.to(device)
            outputs = weak_learner(inputs)
            loss = criterion(outputs, labels)
            misclassifications = (outputs.argmax(dim=1) != labels).float()
            sample_weights = sample_weights.to(device)
            sample_weights *= torch.exp(learning_rate * misclassifications)
            sample_weights /= sample_weights.sum()

        return sample_weights

    def bootstrap_dataloader(self, dataloader, sample_weights):
        """
        Bootstrap a DataLoader using weighted sampling.

        Args:
            dataloader (DataLoader): Original DataLoader.
            sample_weights (torch.Tensor): Sample weights.

        Returns:
            DataLoader: Bootstrapped DataLoader.
        """
        num_samples = len(dataloader.dataset)
        bootstrap_indices = torch.multinomial(sample_weights, num_samples, replacement=True).int()
        return DataLoader([dataloader.dataset[i] for i in bootstrap_indices])


In [None]:
import torch
import torch.nn as nn

class Boosting_Ensemble:
    """
    A class for training a boosting ensemble with a gating network.

    Args:
        num_experts (int): Number of expert models in the ensemble.
        base_learner (nn.Module): Base learner model used for individual experts.
        gating_network (nn.Module): Gating network model.
        train_loader (DataLoader): DataLoader for training data.
    """

    def __init__(self, num_experts, base_learner, gating_network, train_loader):
        """
        Constructor method to initialize the Boosting_Ensemble.

        Args:
            num_experts (int): Number of expert models in the ensemble.
            base_learner (nn.Module): Base learner model used for individual experts.
            gating_network (nn.Module): Gating network model.
            train_loader (DataLoader): DataLoader for training data.
        """
        self.num_experts = num_experts
        self.base_learner = base_learner
        self.gating_network = gating_network(num_experts).to(device)
        self.train_loader = train_loader
        self.expert_training()
        self.gn_training()
        self.testing()

    def expert_training(self):
        """
        Train individual expert models in the ensemble.
        """
        model = ExpertEnsemble(self.base_learner, self.num_experts)
        model.train_experts(self.train_loader)
        print("Expert Training Completed")
        self.trained_experts = model.experts

        # Set requires_grad to True for all parameters of trained experts
        for expert in self.trained_experts:
            for param in expert.parameters():
                param.requires_grad = True

    def gn_training(self):
        """
        Train the gating network.
        """
        total_loss = 0.0
        optimizer = torch.optim.Adam(self.gating_network.parameters(), lr=1e-4, betas=[0.9, 0.9])
        epoch = 0
        max_epochs = 5
        loss_fun = nn.CrossEntropyLoss()

        while epoch < max_epochs:
            epoch += 1
            print(f'Epoch {epoch}')
            current_loss = []
            current_acc = []
            tolerance = 1e-4

            # Training
            for i, data in enumerate(train_loader, 0):
                inputs, targets = data
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()

                combined_outputs = torch.cat([i(inputs) for i in self.trained_experts], dim=1)

                outputs = self.gating_network(combined_outputs)
                loss = loss_fun(outputs, targets)

                loss.backward()
                optimizer.step()
                self.gating_network.apply(WeightExponentiation())
                current_loss.append(loss.item())

                pred = torch.max(outputs, 1).indices
                acc = (targets == pred).sum().item()
                current_acc.append(acc / targets.shape[0])

            # Performance Evaluation
            print('Loss : %.3f' % (sum(current_loss) / len(current_loss)))
            print('Training_Acc  : %.3f' % (sum(current_acc) / len(current_acc)))

        print("Training of Gating Network Completed")

    def testing(self):
        """
        Test the ensemble on a test dataset.
        """
        test_acc = []
        for i, data in enumerate(test_loader, 0):
            inputs, targets = data
            inputs, targets = inputs.to(device), targets.to(device)

            combined_outputs = torch.cat([i(inputs) for i in self.trained_experts], dim=1)

            test_outputs = self.gating_network(combined_outputs)

            pred = torch.max(test_outputs, 1).indices
            acc = (targets == pred).sum().item()
            test_acc.append(acc / targets.shape[0])
        print('Test_Acc  : %.3f' % (sum(test_acc) / len(test_acc)))

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from typing import Tuple

class Dense_Gating(nn.Module):
    """
    Gating network based on DenseNet architecture for IOC_Densenet ensemble.

    Args:
        growth_rate (int): Growth rate for DenseNet.
        block_config (Tuple[int, int, int, int]): Configuration of dense blocks.
        num_init_features (int): Number of initial features.
        bn_size (int): Batch normalization size.
        drop_rate (float): Dropout rate.
        num_classes (int): Number of output classes.
        memory_efficient (bool): Flag for memory-efficient computation.
    """

    def __init__(
        self,
        growth_rate: int = 12,
        block_config: Tuple[int, int, int, int] = (6, 12),
        num_init_features: int = 64,
        bn_size: int = 4,
        drop_rate: float = 0,
        num_classes: int = 10,
        memory_efficient: bool = True,
    ) -> None:
        super().__init__()

        # Define the convolutional layers
        self.conv = OrderedDict(
            [
                ("fc0", nn.Linear(3 * 10, 3 * 32 * 32)),  # Fully connected layer
                ("reshape", Reshape((-1, 3, 32, 32))),  # Reshape to (batch_size, 3, 32, 32)
                ("conv0", nn.Conv2d(3, 128, kernel_size=7, stride=2, padding=3, bias=False)),
                ("norm0", nn.BatchNorm2d(128)),
                ("elu0", nn.ELU(inplace=True)),
                ("pool0", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
            ]
        )

        # First convolution
        self.features = nn.Sequential(self.conv)

        # Each denseblock
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            num_features = 128 if i == 0 else num_features  # Increasing number of filters in first layer
            block = _DenseBlock(
                num_layers=num_layers,
                num_input_features=num_features,
                bn_size=bn_size,
                growth_rate=growth_rate,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,
            )
            self.features.add_module("denseblock%d" % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module("transition%d" % (i + 1), trans)
                num_features = num_features // 2

        # Final batch norm
        self.features.add_module("norm5", nn.BatchNorm2d(num_features))

        # Linear layer
        self.classifier = nn.Linear(num_features, num_classes)

        # Initialize parameters
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the gating network.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor.
        """
        features = self.features(x)
        out = F.relu(features, inplace=True)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

In [None]:
Boosting_Ensemble(3,IOC_DenseNet,Dense_Gating,train_loader)

STAGE:2023-11-20 13:59:34 47:47 ActivityProfilerController.cpp:311] Completed Stage: Warm Up


Expert: 0
Epoch 1
Loss: 2.268
Training Accuracy: 0.150
Epoch 2
