In [3]:
#Test of the SPLIT ENSEMBLE MODEL

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader

# Define a submodel class with a flexible backbone architecture
class Submodel(nn.Module):
    def __init__(self, backbone, num_classes):
        super(Submodel, self).__init__()
        
        # Set the backbone model based on the input backbone argument
        if backbone == "resnet":
            self.backbone = models.resnet50(pretrained=True)
            self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes + 1)  # +1 for OOD
        elif backbone == "efficientnet":
            self.backbone = models.efficientnet_b0(pretrained=True)
            self.backbone.classifier[1] = nn.Linear(self.backbone.classifier[1].in_features, num_classes + 1)
        elif backbone == "vgg":
            self.backbone = models.vgg16(pretrained=True)
            self.backbone.classifier[6] = nn.Linear(self.backbone.classifier[6].in_features, num_classes + 1)
        else:
            raise ValueError("Unsupported backbone model")
        
    def forward(self, x):
        return self.backbone(x)

# Define the Split-Ensemble model with multiple submodels of different architectures
class SplitEnsemble(nn.Module):
    def __init__(self, submodel_specs):
        super(SplitEnsemble, self).__init__()
        # Create each submodel with a specified backbone and number of classes
        self.submodels = nn.ModuleList([
            Submodel(backbone=spec['backbone'], num_classes=spec['num_classes'])
            for spec in submodel_specs
        ])

    def forward(self, x):
        # Forward pass through each submodel
        return [submodel(x) for submodel in self.submodels]

# Training function for Split-Ensemble model
def train_split_ensemble(model, dataloaders, optimizer, criterion, epochs, device):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for inputs, targets, task_ids in dataloaders['train']:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)

            # Compute loss for each submodel based on task ID
            loss = 0
            for i, submodel_output in enumerate(outputs):
                task_mask = (task_ids == i)  # Select samples for current subtask
                if task_mask.sum() > 0:
                    subtask_targets = targets[task_mask]
                    subtask_outputs = submodel_output[task_mask]
                    loss += criterion(subtask_outputs, subtask_targets)

            total_loss += loss.item()
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(dataloaders['train'])}")

# Inference with the Split-Ensemble model
def inference_split_ensemble(model, inputs, device):
    model.eval()
    inputs = inputs.to(device)
    with torch.no_grad():
        outputs = model(inputs)
        # Concatenate the outputs from all submodels
        ensemble_logits = torch.cat(outputs, dim=1)
        return ensemble_logits

# Example usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define submodel specifications with different architectures and class counts
submodel_specs = [
    {"backbone": "resnet", "num_classes": 10},       # Submodel 1: ResNet with 10 classes
    {"backbone": "efficientnet", "num_classes": 15}, # Submodel 2: EfficientNet with 15 classes
    {"backbone": "vgg", "num_classes": 8}            # Submodel 3: VGG with 8 classes
]

# Instantiate the SplitEnsemble model with varied backbones
model = SplitEnsemble(submodel_specs=submodel_specs).to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Example mock dataloader
dataloaders = {
    'train': DataLoader([
        (torch.randn(3, 224, 224), torch.tensor(0), torch.tensor(0))  # Input shape: [3, 224, 224] for each image
        for _ in range(10)
    ], batch_size=2)
}
# Train and infer with the model
train_split_ensemble(model, dataloaders, optimizer, criterion, epochs=5, device=device)

# Example test input for inference
test_input = torch.randn(1, 3, 224, 224)
ensemble_logits = inference_split_ensemble(model, test_input, device)
print("Ensemble Logits:", ensemble_logits)


Epoch [1/5], Loss: 0.7811585616320371
Epoch [2/5], Loss: 0.0007943323820654768
Epoch [3/5], Loss: 9.918132536768099e-06
Epoch [4/5], Loss: 9.77515446720645e-07
Epoch [5/5], Loss: 1.072883321739937e-07
Ensemble Logits: tensor([[  9.2261,  -9.3512,  -9.6399, -10.0210,  -8.6745,  -9.4568,  -9.1432,
          -9.1403,  -9.0571,  -9.4104,  -9.7417,  -0.2302,  -0.0568,  -0.2581,
          -0.3108,   0.1483,   0.1117,   0.0422,   0.0698,  -0.1165,   0.1311,
           0.0566,   0.0993,  -0.0205,  -0.1021,   0.2123,   0.2022,   0.0805,
          -0.4968,   0.0593,   0.0549,   0.1940,   0.1330,   0.2528,  -0.1128,
           0.2171]])


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import numpy
numpy.get_include() 

# Define data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Load the custom dataset
train_dataset = ImageFolder(root='split3/Train', transform=transform)
test_dataset = ImageFolder(root='split3/Test', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define a submodel class for each model
class Submodel(nn.Module):
    def __init__(self, backbone, num_classes):
        super(Submodel, self).__init__()
        
        self.backbone = backbone
        
        # Modify the final fully connected layer depending on the backbone
        if isinstance(backbone, models.ResNet):
            self.fc = nn.Linear(backbone.fc.in_features, num_classes + 1)  # ResNet
            backbone.fc = nn.Identity()  # Remove original fc layer
        elif isinstance(backbone, models.VGG):
            self.fc = nn.Linear(backbone.classifier[6].in_features, num_classes + 1)  # VGG
            backbone.classifier[6] = nn.Identity()  # Remove original fc layer
        elif isinstance(backbone, models.EfficientNet):
            self.fc = nn.Linear(backbone.classifier[1].in_features, num_classes + 1)  # EfficientNet
            backbone.classifier[1] = nn.Identity()  # Remove original classifier
        
    def forward(self, x):
        x = self.backbone(x)
        return self.fc(x)

# Define the Split-Ensemble model with multiple submodels
class SplitEnsemble(nn.Module):
    def __init__(self, submodels):
        super(SplitEnsemble, self).__init__()
        self.submodels = nn.ModuleList(submodels)

    def forward(self, x):
        return [submodel(x) for submodel in self.submodels]

# Initialize models with different architectures
num_classes = len(train_dataset.classes)
backbones = [
    models.resnet50(pretrained=True),
    models.vgg16(pretrained=True),
    models.efficientnet_b0(pretrained=True)
]
submodels = [Submodel(backbone, num_classes) for backbone in backbones]
ensemble_model = SplitEnsemble(submodels).to(device)

# Define the training function
def train_split_ensemble(model, dataloader, optimizer, criterion, epochs):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)

            # Compute loss for each submodel
            loss = sum(criterion(output, targets) for output in outputs)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(dataloader)}")

# Initialize optimizer and loss function
optimizer = optim.Adam(ensemble_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the ensemble model
train_split_ensemble(ensemble_model, train_loader, optimizer, criterion, epochs=5)

# Example inference on test data
ensemble_model.eval()
with torch.no_grad():
    for inputs, _ in test_loader:
        inputs = inputs.to(device)
        outputs = ensemble_model(inputs)
        ensemble_logits = torch.cat(outputs, dim=1)  # Concatenate logits from each submodel
        print("Ensemble Logits:", ensemble_logits)


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to C:\Users\Monis/.cache\torch\hub\checkpoints\efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:06<00:00, 3.13MB/s]


Epoch [1/5], Loss: 5.571129071194193
Epoch [2/5], Loss: 4.663950025517008
Epoch [3/5], Loss: 4.260779917758444
Epoch [4/5], Loss: 3.9619379634442535
Epoch [5/5], Loss: 3.7908892776655114
Ensemble Logits: tensor([[  6.5421,  -3.4923,   2.9002,  ...,  -1.5194,  -2.7867,  -5.9313],
        [ -0.4400,  -1.0768,  -0.7765,  ...,  -4.5453,  -5.1013, -10.3125],
        [  2.0469,  -1.7483,   1.2423,  ...,  -1.9759,  -3.6533,  -6.4030],
        ...,
        [ -0.8568,  -3.3298,   0.1668,  ...,   4.1477,  -2.9081,  -5.1651],
        [  2.9173,  -2.6368,   1.4887,  ...,   3.7092,  -5.5010,  -9.9419],
        [ -1.7530,  -0.2741,  -1.7451,  ...,  -2.3296,  -7.5166,  -9.3226]],
       device='cuda:0')
Ensemble Logits: tensor([[ -1.9890,   1.3639,  -0.7395,  ...,   1.9522,  -5.0666, -10.5160],
        [ -1.8085,   0.7615,  -0.8429,  ...,   2.0292, -10.5428, -14.2019],
        [ -2.0430,   4.5818,  -0.4075,  ...,   0.9516,  -5.5364, -12.4551],
        ...,
        [  1.2307,  -0.7155,   2.6061,  ...,

In [5]:
!pip install numpy

Collecting numpy
  Using cached numpy-2.0.2-cp39-cp39-win_amd64.whl.metadata (59 kB)
Using cached numpy-2.0.2-cp39-cp39-win_amd64.whl (15.9 MB)
Installing collected packages: numpy
Successfully installed numpy-2.0.2
