In [None]:
!pip install torch torchvision


# on complete data set baseline model without weight decay

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Split the training dataset into training and validation sets
train_dataset, val_dataset = random_split(train_dataset, [50000, 10000])

# Define data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))


#Small_mnist data set without weight (baseline)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))



#small_mnist data set with weight decay = 0.0005 (baseline model)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)
# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define optimizer with weight decay
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))


# Implementing Data_grad with all default parameters on MNIST data set

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load MNIST dataset with 50,000 images for training and 10,000 images for testing
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainset, _ = torch.utils.data.random_split(trainset, [50000, 10000])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with default parameters
optimizer = optim.Adam(net.parameters())

# Training loop
for epoch in range(5):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

    # Evaluate on test set
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %.2f %%' % (100 * correct / total))

print('Finished Training')


#with optimal data_grad weight 50 + without weight decay

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load Your Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters())

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Data grad   50 + weight decay = 0.0005

In [None]:
import torch.optim.lr_scheduler as lr_scheduler

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.0005)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

target_grad = 50

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Spect Reg + with weight decay on small_mnist

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.0005)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss spectral weight = 0.05
        loss += 0.05 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# spectreg without weight decay

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss with spectral weight = 0.03
        loss += 0.03 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#CIFAR-10

# base line model without weight decay

In [None]:
### ciphar 10 base line model
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(30):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#bse line model with weight decay = 0.003

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision.transforms as transforms
# import torchvision.datasets as datasets
# from torch.utils.data import DataLoader, Subset
# import torchvision.models as models

# # Transformations for the dataset
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
# ])

# # Load CIFAR-10 training and testing datasets
# trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
# testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay= 0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# spect reg = 0.001 without weight decay


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral regularization weight
spectral_weight = 0.001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        # Step with optimizer
        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# spect reg = 0.03 with weight decay = 0.0005


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral regularization weight
spectral_weight = 0.03

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        # Step with optimizer
        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# ciphar-100

# base line + without weight decay

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#base line + with weight decay = 0.003

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#spectreg = 0.001 + without weight decay

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Function to calculate spectral norm of weights
def calculate_spectral_norm(model):
    spectral_norm = 0
    for param in model.parameters():
        if param.dim() > 1:  # Exclude biases
            spectral_norm += torch.norm(param, p='fro')
    return spectral_norm

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Add spectral regularization
        spectral_weight = 0.001
        spectral_norm = calculate_spectral_norm(model)
        loss += spectral_weight * spectral_norm

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#spectreg = 0.0003 + weight decay = 0.003

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Function to calculate spectral norm of weights
def calculate_spectral_norm(model):
    spectral_norm = 0
    for param in model.parameters():
        if param.dim() > 1:  # Exclude biases
            spectral_norm += torch.norm(param, p='fro')
    return spectral_norm

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Add spectral regularization
        spectral_weight = 0.0003
        spectral_norm = calculate_spectral_norm(model)
        loss += spectral_weight * spectral_norm

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# Small_MNIST

# Baseline Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))



#Double Back with optimal weight = 50

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load Your Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)
import torch.optim.lr_scheduler as lr_scheduler

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

target_grad = 50

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#SpectReg with optimal weight = 0.03

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss spectral optimal weight = 0.03
        loss += 0.03 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#CP with optimal weight = 0.01

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Confidence Penalty (CP) regularization coefficient
cp_coefficient = 0.01  # You can adjust this value as needed

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)

        # Confidence Penalty (CP) regularization
        confidence_penalty = -torch.mean(torch.sum(torch.softmax(output, dim=1) * torch.log_softmax(output, dim=1), dim=1))
        loss += cp_coefficient * confidence_penalty

        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))


#CP= 0.01 + DoubleBack = 10

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load Your Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Define the parameters for double back and confidence penalty regularization
double_back_weight = 10
cp_weight = 0.01

# Target gradient for double back
target_grad = 50

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Calculate gradient norm for double back regularization
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)

        # Backward pass with double back regularization
        loss.backward(retain_graph=True)

        # Apply double back regularization
        for param in net.parameters():
            param.grad *= double_back_weight / (grad_norm + 1e-8)

        # Confidence Penalty (CP) regularization
        confidence_penalty = -torch.mean(torch.sum(torch.softmax(outputs, dim=1) * torch.log_softmax(outputs, dim=1), dim=1))
        loss += cp_weight * confidence_penalty

        # Update parameters
        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Cp= = 0.01 + Spectral weight = 0.03

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty (CP) regularization
        confidence_penalty = -torch.mean(torch.sum(torch.softmax(outputs, dim=1) * torch.log_softmax(outputs, dim=1), dim=1))
        loss += 0.01 * confidence_penalty

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss
        loss += 0.03 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#cifar-10

#Baseline Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Double back optimal weight = 1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 1

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
          optimizer.step()

        # Print statistics
        running_loss += loss.item()


    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#spectral = 0.03

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 1

# Spectral regularization weight
spectral_weight = 0.03

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
          optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Cp with optimal weight = 0.003

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision.transforms as transforms
# import torchvision.datasets as datasets
# from torch.utils.data import DataLoader, Subset
# import torchvision.models as models

# # Transformations for the dataset
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
# ])

# # Load CIFAR-10 training and testing datasets
# trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
# testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function with CP regularization
class CPRegularizedLoss(nn.Module):
    def __init__(self, base_criterion, confidence_penalty_weight):
        super(CPRegularizedLoss, self).__init__()
        self.base_criterion = base_criterion
        self.confidence_penalty_weight = confidence_penalty_weight

    def forward(self, outputs, labels):
        base_loss = self.base_criterion(outputs, labels)
        softmax_outputs = nn.functional.softmax(outputs, dim=1)
        confidence_penalty = -torch.mean(torch.sum(softmax_outputs * torch.log(softmax_outputs + 1e-10), dim=1))
        return base_loss + self.confidence_penalty_weight * confidence_penalty

criterion = CPRegularizedLoss(nn.CrossEntropyLoss(), confidence_penalty_weight=0.003)

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# cp + Double back used

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Confidence Penalty weight
cp_weight = 0.003

# Target gradient for clipping
target_grad = 1

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty term
        penalty = cp_weight * torch.mean((torch.softmax(outputs, dim=1) - torch.eye(10)[labels]) ** 2)

        # Add penalty to the loss
        loss += penalty

        # Backward pass
        loss.backward()

        # Clip gradients
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)

        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# cp +spectreg

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 1

# Confidence Penalty weight
cp_weight = 0.003

# Spectral regularization weight
spectral_weight = 0.03

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        # Confidence Penalty term
        penalty = cp_weight * torch.mean((torch.softmax(outputs, dim=1) - torch.eye(10)[labels]) ** 2)


        # Add penalty to the loss
        loss += penalty

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#CIPHER-100

#base line

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Doule back without cp

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 0.003

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
          optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Spectreg without cp

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral Regularization weight
spectreg_weight = 0.00003

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Spectral Regularization
        spectral_loss = 0
        for name, param in model.named_parameters():
            if 'weight' in name:
                spectral_loss += torch.norm(param, p='fro')

        loss += spectreg_weight * spectral_loss

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# implementation of CP = 0.00001 + double back =  0.003  = 0.00001

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 0.003

# Confidence Penalty weight
cp_weight = 0.00001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty
        outputs_softmax = nn.functional.softmax(outputs, dim=1)
        grad_outputs = torch.autograd.grad(outputs=outputs_softmax, inputs=inputs,
                                           grad_outputs=torch.ones_like(outputs_softmax),
                                           create_graph=True)[0]
        cp_loss = torch.mean(torch.norm(grad_outputs, p=2, dim=1))

        loss += cp_weight * cp_loss

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


# cp = 0.00001

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Confidence Penalty weight
cp_weight = 0.00001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty
        max_prob, _ = torch.max(nn.functional.softmax(outputs, dim=1), dim=1)
        cp_loss = torch.mean(max_prob)

        loss += cp_weight * cp_loss

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.003)
        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


#Spect Reg = 0.00003 + Cp = 0.00001

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral Regularization weight
spectreg_weight = 0.00003

# Confidence Penalty weight
cp_weight = 0.00001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Spectral Regularization
        spectral_loss = 0
        for name, param in model.named_parameters():
            if 'weight' in name:
                spectral_loss += torch.norm(param, p='fro')

        loss += spectreg_weight * spectral_loss

        # Confidence Penalty
        max_prob, _ = torch.max(nn.functional.softmax(outputs, dim=1), dim=1)
        cp_loss = torch.mean(max_prob)

        loss += cp_weight * cp_loss

        # Backward pass
        loss.backward()

        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


In [None]:
!pip install numpy torch torchvision pytorch-ignite tensorboardX tensorboard opendatasets efficientnet-pytorch

# Import dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime as dt

import torch
from torch import optim, nn
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torchvision.utils import make_grid
from torchvision import models, datasets
from torchvision import transforms as T

from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss, Precision, Recall
from ignite.handlers import LRScheduler, ModelCheckpoint, global_step_from_engine
from ignite.contrib.handlers import ProgressBar, TensorboardLogger
import ignite.contrib.engines.common as common

import opendatasets as od
import os
from random import randint
import urllib
import zipfile

# Define device to use (CPU or GPU). CUDA = GPU support for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [None]:
od.download("https://www.kaggle.com/akash2sharma/tiny-imagenet")
DATA_DIR = 'tiny-imagenet/tiny-imagenet-200'
# Define training and validation data paths
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VALID_DIR = os.path.join(DATA_DIR, 'val')

In [None]:
directory = 'tiny-imagenet/tiny-imagenet-200'
num_classes = 200

In [None]:
# modify this depending on memory constraints
batch_size = 64

# the magic normalization parameters come from the example
transform_mean = np.array([ 0.485, 0.456, 0.406 ])
transform_std = np.array([ 0.229, 0.224, 0.225 ])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = transform_mean, std = transform_std),
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean = transform_mean, std = transform_std),
])

traindir = os.path.join(directory, "train")
# be careful with this set, the labels are not defined using the directory structure
valdir = os.path.join(directory, "val")

train = datasets.ImageFolder(traindir, train_transform)
val = datasets.ImageFolder(valdir, val_transform)

train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, shuffle=True)

assert num_classes == len(train_loader.dataset.classes)

In [None]:
small_labels = {}
with open(os.path.join(directory, "words.txt"), "r") as dictionary_file:
    line = dictionary_file.readline()
    while line:
        label_id, label = line.strip().split("\t")
        small_labels[label_id] = label
        line = dictionary_file.readline()

In [None]:

list(small_labels.items())[:5]


In [None]:
os.listdir(traindir)[:5]


In [None]:
labels = {}
label_ids = {}
for label_index, label_id in enumerate(train_loader.dataset.classes):
    label = small_labels[label_id]
    labels[label_index] = label
    label_ids[label_id] = label_index

In [None]:
list(small_labels.items())[:5]


In [None]:
list(label_ids.items())[:5]


In [None]:
val_label_map = {}
with open(os.path.join(directory, "val/val_annotations.txt"), "r") as val_label_file:
    line = val_label_file.readline()
    while line:
        file_name, label_id, _, _, _, _ = line.strip().split("\t")
        val_label_map[file_name] = label_id
        line = val_label_file.readline()

In [None]:
list(val_label_map.items())[:5]


In [None]:
val_loader.dataset.imgs[:5]


In [None]:
for i in range(len(val_loader.dataset.imgs)):
    file_path = val_loader.dataset.imgs[i][0]

    file_name = os.path.basename(file_path)
    label_id = val_label_map[file_name]

    val_loader.dataset.imgs[i] = (file_path, label_ids[label_id])

In [None]:
val_loader.dataset.imgs[:5]


# Baseline model

In [None]:
def initialize_weights(m):
    weight_shape = list(m.weight.data.size())
    fan_in = weight_shape[1]
    fan_out = weight_shape[0]
    w_bound = np.sqrt(6. / (fan_in + fan_out))
    m.weight.data.uniform_(-w_bound, w_bound)
    m.bias.data.fill_(0)

In [None]:
pre_trained_model = models.alexnet(pretrained=True)
state_dict = pre_trained_model.state_dict()
state_dict.pop("classifier.6.weight")
state_dict.pop("classifier.6.bias")
model = models.alexnet(num_classes=num_classes)
model.load_state_dict(state_dict, strict=False)

parameters = model.classifier[6].parameters()
initialize_weights(model.classifier[6])


criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(parameters, lr=0.001, momentum=0.9) #adams optimizer
def top_k_error(top_k, total):
    return 100.0 - top_k / total * 100.0

In [None]:
def run_epoch(loader, train=True, log_every=100):
    running_loss = 0.0
    running_top_1 = 0.0
    running_top_5 = 0.0
    running_total = 0.0

    epoch_top_1 = 0.0
    epoch_top_5 = 0.0
    epoch_total = 0.0

    model.train(mode=train)

    for batch_number, (batch_inputs, batch_labels) in enumerate(loader):
        batch_inputs, batch_labels = Variable(batch_inputs), Variable(batch_labels)

        if train:
            optimizer.zero_grad()

        batch_logits = model(batch_inputs)

        if train:
            batch_loss = criterion(batch_logits, batch_labels)
            batch_loss.backward()

            optimizer.step()

            running_loss += batch_loss.item()


        batch_labels = batch_labels.data.cpu().numpy()
        batch_predictions = batch_logits.topk(5)[1].data.cpu().numpy()

        for i in range(len(batch_labels)):
            if batch_labels[i] == batch_predictions[i, 0]:
                running_top_1 += 1
                running_top_5 += 1
                epoch_top_1 += 1
                epoch_top_5 += 1
            else:
                for j in range(1, 5):
                    if batch_labels[i] == batch_predictions[i, j]:
                        running_top_5 += 1
                        epoch_top_5 += 1
                        break

        running_total += len(batch_labels)
        epoch_total += len(batch_labels)

        if batch_number % log_every == log_every - 1:
            if train:
                print("[Batch {:5d}] Loss: {:.3f} Top-1 Error: {:.3f} Top-5 Error: {:.3f}".format(
                    batch_number + 1,
                    running_loss / log_every,
                    top_k_error(running_top_1, running_total),
                    top_k_error(running_top_5, running_total)
                ))

            running_loss = 0.0
            running_top_1 = 0.0
            running_top_5 = 0.0
            running_total = 0.0

    return top_k_error(epoch_top_1, epoch_total), top_k_error(epoch_top_5, epoch_total)

In [None]:
num_epochs = 1 # modify this to run different number of epochs

for epoch_number in range(num_epochs):

    train_top_1_error, train_top_5_error = run_epoch(train_loader, train=True)
    print("[Epoch {:3d}] Training Top-1 Error: {:.3f} Top-5 Error: {:.3f}".format(
        epoch_number + 1, train_top_1_error, train_top_5_error))

    val_top_1_error, val_top_5_error = run_epoch(val_loader, train=False)
    print("[Epoch {:3d}] Validation Top-1 Error: {:.3f} Top-5 Error: {:.3f}".format(
        epoch_number + 1, val_top_1_error, val_top_5_error))

print("Finished Training")

So the accuracy we are getting here in the 8 th batch is :
For Top 1 base line is = 24.188 and for Top 5 is = 48.625.
due to very less computation power i am stopping at batch 8. accuracy could have been higher if we go with more no. of batches lets say 20 .

In [None]:
model.train(mode=False)

num_images = 5 # modify the number of images shown

batch_inputs, batch_labels = next(iter(val_loader))
batch_inputs = Variable(batch_inputs, volatile=True)

batch_logits = model(batch_inputs)

batch_labels = batch_labels.numpy()
batch_predictions = batch_logits.topk(5)[1].data.cpu().numpy()

cell_number = 1

plt.figure(figsize=(4, num_images * 2))

for image_number in range(num_images):
    image = np.copy(batch_inputs.data[image_number].cpu().numpy())
    image = np.transpose(image, (1, 2, 0))
    for channel in range(3):
        image[:, :, channel] = image[:, :, channel] * transform_std[channel] + transform_mean[channel]

    label = labels[batch_labels[image_number]]

    plt.subplot(num_images, 2, cell_number)

    ax = plt.imshow(image)
    plt.xticks([])
    plt.yticks([])

    cell_number += 1

    plt.subplot(num_images, 2, cell_number)
    plt.axis("off")
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.text(0, 0.85, "Label: {}".format(label))
    for prediction_number in range(5):
        plt.text(0, 0.85 - 0.15 * (prediction_number + 1), "Prediction-{:d}: {}".format(
            prediction_number + 1, labels[batch_predictions[image_number, prediction_number]]))

    cell_number += 1

plt.show()

# spectral reg with optimal weight  = 20,000 and weight decay = 0.0001

In [None]:
from transformers import AdamW
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

# Define your model and load pre-trained weights
pre_trained_model = models.alexnet(pretrained=True)
state_dict = pre_trained_model.state_dict()
state_dict.pop("classifier.6.weight")
state_dict.pop("classifier.6.bias")
model = models.alexnet(num_classes=num_classes)
model.load_state_dict(state_dict, strict=False)

# Initialize the optimizer with the entire model parameters
optimizer = AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=0.0001)

# Define your loss function
criterion = nn.CrossEntropyLoss()

def top_k_error(top_k, total):
    return 100.0 - top_k / total * 100.0


In [None]:
import torch
from torch.autograd import Variable

def run_epoch(loader, model, criterion, optimizer=None, train=True, log_every=100, spectral_weight=20000):
    running_loss = 0.0
    running_top_1 = 0.0
    running_top_5 = 0.0
    running_total = 0.0

    epoch_top_1 = 0.0
    epoch_top_5 = 0.0
    epoch_total = 0.0

    model.train(mode=train)

    for batch_number, (batch_inputs, batch_labels) in enumerate(loader):
        batch_inputs, batch_labels = Variable(batch_inputs), Variable(batch_labels)

        if train:
            optimizer.zero_grad()

        batch_logits = model(batch_inputs)

        if train:
            batch_loss = criterion(batch_logits, batch_labels)

            # Spectral Regularization
            weight_sum = 0.0
            for name, param in model.named_parameters():
                if 'weight' in name:
                    weight_sum += torch.sum(param ** 2)
            batch_loss += spectral_weight * weight_sum

            batch_loss.backward()

            optimizer.step()

            running_loss += batch_loss.item()


        batch_labels = batch_labels.data.cpu().numpy()
        batch_predictions = batch_logits.topk(5)[1].data.cpu().numpy()

        for i in range(len(batch_labels)):
            if batch_labels[i] == batch_predictions[i, 0]:
                running_top_1 += 1
                running_top_5 += 1
                epoch_top_1 += 1
                epoch_top_5 += 1
            else:
                for j in range(1, 5):
                    if batch_labels[i] == batch_predictions[i, j]:
                        running_top_5 += 1
                        epoch_top_5 += 1
                        break

        running_total += len(batch_labels)
        epoch_total += len(batch_labels)

        if batch_number % log_every == log_every - 1:
            if train:
                print("[Batch {:5d}] Loss: {:.3f} Top-1 Error: {:.3f} Top-5 Error: {:.3f}".format(
                    batch_number + 1,
                    running_loss / log_every,
                    top_k_error(running_top_1, running_total),
                    top_k_error(running_top_5, running_total)
                ))

            running_loss = 0.0
            running_top_1 = 0.0
            running_top_5 = 0.0
            running_total = 0.0

    return top_k_error(epoch_top_1, epoch_total), top_k_error(epoch_top_5, epoch_total)


In [None]:
num_epochs = 1 # modify this to run different number of epochs

for epoch_number in range(num_epochs):
    train_top_1_error, train_top_5_error = run_epoch(train_loader, model, criterion, optimizer, train=True)
    print("[Epoch {:3d}] Training Top-1 Error: {:.3f} Top-5 Error: {:.3f}".format(
        epoch_number + 1, train_top_1_error, train_top_5_error))

    val_top_1_error, val_top_5_error = run_epoch(val_loader, model, criterion, train=False)
    print("[Epoch {:3d}] Validation Top-1 Error: {:.3f} Top-5 Error: {:.3f}".format(
        epoch_number + 1, val_top_1_error, val_top_5_error))

print("Finished Training")
