<a href="https://colab.research.google.com/github/protagora/learnable-activation-function/blob/dev/laf_using_ecdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import math

import cupy as xp  # Import CuPy as xp

# try:
#     import cupy as xp  # Import CuPy as xp
# except ImportError:
#     import numpy as xp  # Fall back to NumPy if CuPy isn't available

# Custom Learnable Activation Function
# CDFActivation
class laf(nn.Module):
    def __init__(self):
        super(laf, self).__init__()

    def forward(self, x):
        # Calculate the normal CDF using the error function (erf)
        return 0.5 * (1 + torch.erf(x / math.sqrt(2)))

class EmpiricalCDFActivation(nn.Module):
    def __init__(self):
        super(EmpiricalCDFActivation, self).__init__()

    def forward(self, x):
        # Sort the values in the batch to estimate empirical CDF
        sorted_x, _ = torch.sort(x.view(-1))  # Flatten and sort
        ranks = torch.searchsorted(sorted_x, x.view(-1))  # Rank of each value in sorted list
        empirical_cdf = ranks.float() / len(sorted_x)  # Normalize by total count for CDF

        # Reshape to original shape
        return empirical_cdf.view_as(x)

# Define a CNN model with the custom batch normalization
class CNNWithLAF(nn.Module):
    def __init__(self):
        super(CNNWithLAF, self).__init__()

        # Convolutional layers with custom batch normalization
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.laf = laf()

        # Pooling layer
        self.pool = nn.MaxPool2d(2, 2)  # Downsampling by 2

        # Placeholder for the fully connected layer; we'll determine in_features dynamically
        self.fc1 = None
        self.bn4 = None
        self.fc2 = nn.Linear(256, 10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        # Convolutional layers with ReLU and custom batch normalization
        x = self.laf(self.bn1(self.conv1(x)))
        x = self.pool(self.laf(self.bn2(self.conv2(x))))
        x = self.pool(self.laf(self.bn3(self.conv3(x))))

        # Flatten
        x = x.view(x.size(0), -1)  # Flatten

        # Initialize fully connected layer dynamically based on input size
        if self.fc1 is None:
            # Dynamically determine input size for fc1 based on current input dimensions
            self.fc1 = nn.Linear(x.size(1), 256).to(x.device)
            self.bn4 = nn.BatchNorm1d(256).to(x.device) #CustomHistogramBatchNorm(256, dim=1).to(x.device)

        # Fully connected layers with ReLU and custom batch normalization
        x = self.laf(self.bn4(self.fc1(x)))
        x = self.fc2(x)
        return x

# Check for CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the CNN model, loss function, and optimizer
model = CNNWithLAF().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        # Calculate training accuracy
        train_accuracy = 100 * correct_train / total_train
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%')

# Evaluation function
def evaluate(model, test_loader):
    model.eval()
    correct_test = 0
    total_test = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()

    test_accuracy = 100 * correct_test / total_test
    print(f'Accuracy of the model on the test set: {test_accuracy:.2f}%')

# Train and evaluate the CNN model
train(model, train_loader, criterion, optimizer, num_epochs=10)
evaluate(model, test_loader)



Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 1.7317, Training Accuracy: 42.03%
Epoch [2/10], Loss: 1.3222, Training Accuracy: 55.60%
Epoch [3/10], Loss: 1.1236, Training Accuracy: 62.45%
Epoch [4/10], Loss: 1.0089, Training Accuracy: 66.12%
Epoch [5/10], Loss: 0.9286, Training Accuracy: 68.79%
Epoch [6/10], Loss: 0.8718, Training Accuracy: 70.65%
Epoch [7/10], Loss: 0.8301, Training Accuracy: 72.32%
Epoch [8/10], Loss: 0.7836, Training Accuracy: 73.87%
Epoch [9/10], Loss: 0.7480, Training Accuracy: 75.06%
Epoch [10/10], Loss: 0.7189, Training Accuracy: 76.05%
Accuracy of the model on the test set: 62.79%
