# Necessary package import

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from einops import rearrange
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from timm.models.efficientnet import efficientnetv2_s

  from .autonotebook import tqdm as notebook_tqdm


# Modified efficientnetv2_s architecture (Convlution + Involution Kernels)

## Open source Involution (https://github.com/shuuchen/involution.pytorch/blob/main/involution.py)

In [2]:
class Involution(nn.Module):
    """
    Implementation of `Involution: Inverting the Inherence of Convolution for Visual Recognition`.
    """
    def __init__(self, in_channels, out_channels, groups=1, kernel_size=3, stride=1, reduction_ratio=2):

        super().__init__()

        channels_reduced = max(1, in_channels // reduction_ratio)
        padding = kernel_size // 2

        self.reduce = nn.Sequential(
            nn.Conv2d(in_channels, channels_reduced, 1),
            nn.BatchNorm2d(channels_reduced),
            nn.ReLU(inplace=True))

        self.span = nn.Conv2d(channels_reduced, kernel_size * kernel_size * groups, 1)
        self.unfold = nn.Unfold(kernel_size, padding=padding, stride=stride)
        
        self.resampling = None if in_channels == out_channels else nn.Conv2d(in_channels, out_channels, 1)

        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.groups = groups

    @classmethod
    def get_name(cls):
        """
        Return this layer name.

        Returns:
            str: layer name.
        """
        return 'Involution'

    def forward(self, input_tensor):
        """
        Calculate Involution.

        override function from PyTorch.
        """
        _, _, height, width = input_tensor.size()
        if self.stride > 1:
            out_size = lambda x: (x + 2 * self.padding - self.kernel_size) // self.stride + 1
            height, width = out_size(height), out_size(width)
        uf_x = rearrange(self.unfold(input_tensor), 'b (g d k j) (h w) -> b g d (k j) h w',
                         g=self.groups, k=self.kernel_size, j=self.kernel_size, h=height, w=width)

        if self.stride > 1:
            input_tensor = F.adaptive_avg_pool2d(input_tensor, (height, width))
        kernel = rearrange(self.span(self.reduce(input_tensor)), 'b (k j g) h w -> b g (k j) h w',
                           k=self.kernel_size, j=self.kernel_size)

        out = rearrange(torch.einsum('bgdxhw, bgxhw -> bgdhw', uf_x, kernel), 'b g d h w -> b (g d) h w')
        
        if self.resampling:
            out = self.resampling(out)
            
        return out.contiguous()

## Modify all Conv2D within the EfficientNet-v2-s architecture to Conv2D + Inv2D

In [3]:
import torch
import torch.nn as nn

class ConvInvolutionBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(ConvInvolutionBlock, self).__init__()

        # Convolution block
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        self.conv_bn = nn.BatchNorm2d(out_channels)
        self.conv_act = nn.SiLU(inplace=True)

        # Involution block
        self.involution = Involution(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride)
        self.inv_bn = nn.BatchNorm2d(out_channels)
        self.inv_act = nn.SiLU(inplace=True)

    def forward(self, x):
        # Apply convolution block
        x = self.conv(x)
        x = self.conv_bn(x)
        x = self.conv_act(x)
        # print(x.shape)

        # Apply involution block
        x = self.involution(x)
        x = self.inv_bn(x)
        x = self.inv_act(x)
        # print(x.shape)

        return x


# Define the updated EfficientNetV2SWithInvolution class
class EfficientNetV2SWithInvolution(nn.Module):
    def __init__(self, num_classes):  # Example for 86 classes (birds)
        super(EfficientNetV2SWithInvolution, self).__init__()

        # Load the base EfficientNetV2-S model
        base_model = efficientnetv2_s(pretrained=False)  # Set pretrained=False to initialize from scratch

        # Get the stem and features from the original EfficientNetV2-S
        self.stem = base_model.conv_stem  # This is the stem (initial convolution)
        self.blocks = base_model.blocks  # Blocks contain all the convolutional layers

        # Replace Conv2d inside each block with ConvInvolutionBlock every 5th occurrence
        self.modified_blocks = nn.ModuleList()
        conv_count = 0

        for block in self.blocks:
            # Recursively replace Conv2d in block
            block_children = self._replace_conv_in_block(block, conv_count)
            self.modified_blocks.append(nn.Sequential(*block_children))

        # Final classifier layers
        self.classifier = nn.Linear(256, num_classes)

    def _replace_conv_in_block(self, block, conv_count):
        block_children = list(block.children())
        modified_block_children = []

        for sub_block in block_children:
            sub_block_children = list(sub_block.children())
            modified_sub_block_children = []

            for sub_block_child in sub_block_children:
                if isinstance(sub_block_child, nn.Conv2d):
                    conv_count += 1  # Increment Conv2d counter
                    if conv_count % 10 == 0:
                        # Replace the Conv2d layer with ConvInvolutionBlock every 5th occurrence
                        kernel_size = sub_block_child.kernel_size[0]
                        stride = sub_block_child.stride[0]
                        padding = sub_block_child.padding[0]
                        modified_sub_block = ConvInvolutionBlock(sub_block_child.in_channels, sub_block_child.out_channels, kernel_size, stride, padding)
                        modified_sub_block_children.append(modified_sub_block)
                    else:
                        # Keep the original Conv2d layer
                        modified_sub_block_children.append(sub_block_child)
                elif isinstance(sub_block_child, nn.Sequential):
                    # Recursively check inside the Sequential blocks
                    modified_sub_block_children.append(self._replace_conv_in_block(sub_block_child, conv_count))
                else:
                    # If the submodule is not Conv2d, keep it as it is
                    modified_sub_block_children.append(sub_block_child)

            # Once all sub-blocks have been processed, add the modified block
            modified_block_children.append(nn.Sequential(*modified_sub_block_children))

        return modified_block_children

    def forward(self, x):
        # Forward pass through stem
        x = self.stem(x)

        # Forward pass through modified blocks
        for block in self.modified_blocks:
            x = block(x)

        # Global Average Pooling and classifier
        x = x.mean([2, 3])  # Global Average Pooling
        x = self.classifier(x)
        return x

# Instantiate the model
model = EfficientNetV2SWithInvolution(num_classes=1486)  # Dynamically set number of classes

## Check torch summary

In [None]:
import torch

# Clear cache
torch.cuda.empty_cache()

# Optional: Collect garbage (useful if memory is fragmented)
import gc
gc.collect()

# Assuming your model is on the GPU
# model = efficientnetv2_s(pretrained=False)
model = EfficientNetV2SWithInvolution(num_classes=1486)  # Dynamically set number of classes
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)  # Ensure model is on GPU

# Print model summary
summary(model, input_size=(3, 256, 256))

# Model training

In [4]:
import torch

# Clear cache
torch.cuda.empty_cache()

# Optional: Collect garbage (useful if memory is fragmented)
import gc
gc.collect()

16

In [None]:
import torch
import os
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.amp import GradScaler, autocast
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set device to GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on: {device}")

# Hyperparameters
batch_size = 32  # Batch size
epochs = 10  # Set the number of epochs

# Dataset paths
train_dir = "/home/sashankh-ravi/Documents/Datasets/iNet_Bird_Small/train_transformed"
val_dir = "/home/sashankh-ravi/Documents/Datasets/iNet_Bird_Small/val_transformed"

# Dynamically fetch number of classes from the folder structure (number of subfolders)
num_classes = len(os.listdir(train_dir))

# Image resizing and transformation (only resizing to 256x256)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to 256x256
    transforms.ToTensor(),  # Convert image to tensor
])

# Custom dataset loading
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
val_dataset = datasets.ImageFolder(val_dir, transform=transform)

# Create DataLoader with shuffling enabled for the training set
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Initialize the model (assuming EfficientNetV2SWithInvolution is defined elsewhere)
model = EfficientNetV2SWithInvolution(num_classes=1486)
# model = efficientnetv2_s(pretrained=False)

# Move model to GPU if available
model = model.to(device)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer (e.g., Adam optimizer)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initialize the gradient scaler for FP16 precision
scaler = GradScaler()

# Initialize lists to store training and validation losses
train_losses = []
val_losses = []

# Training loop
for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_train_loss = 0.0
    correct_preds = 0
    total_preds = 0

    for step, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass using mixed precision (autocast)
        with autocast(device_type='cuda', dtype=torch.float16):  # Use autocast for FP16 precision
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass and gradient scaling
        scaler.scale(loss).backward()

        # Update the model weights using FP16 gradients and scaling
        scaler.step(optimizer)
        scaler.update()

        # Track loss and accuracy
        running_train_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct_preds += torch.sum(preds == labels)
        total_preds += labels.size(0)

    avg_train_loss = running_train_loss / len(train_loader)
    train_accuracy = 100 * correct_preds / total_preds
    train_losses.append(avg_train_loss)

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {avg_train_loss:.4f}, Accuracy: {train_accuracy:.2f}%")

    # Validation loop
    model.eval()  # Set model to evaluation mode
    running_val_loss = 0.0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            with autocast():  # Use autocast for FP16 precision
                outputs = model(images)

            # Calculate loss
            val_loss = criterion(outputs, labels)
            running_val_loss += val_loss.item()

            # Track accuracy
            _, preds = torch.max(outputs, 1)
            correct_preds += torch.sum(preds == labels)
            total_preds += labels.size(0)

        avg_val_loss = running_val_loss / len(val_loader)
        val_accuracy = 100 * correct_preds / total_preds
        val_losses.append(avg_val_loss)

        print(f"Validation Accuracy: {val_accuracy:.2f}%")

# Plot the cumulative training and validation loss curves
plt.figure(figsize=(10, 5))
plt.plot(range(1, epochs + 1), train_losses, label="Training Loss", color='blue', marker='o')
plt.plot(range(1, epochs + 1), val_losses, label="Validation Loss", color='red', marker='o')
plt.xlabel('Epochs')
plt.ylabel('Cumulative Loss')
plt.title('Cumulative Training and Validation Loss per Epoch')
plt.legend()
plt.grid(True)
plt.show()


Training on: cuda
