# Small_MNIST

# Baseline Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))



Total parameters: 44426
Epoch 0, Loss: 1.780789
Epoch 10, Loss: 0.420563
Epoch 20, Loss: 0.219223
Epoch 30, Loss: 0.061446
Epoch 40, Loss: 0.065515
Epoch 50, Loss: 0.022497
Epoch 60, Loss: 0.086612
Epoch 70, Loss: 0.024234
Epoch 80, Loss: 0.017026
Epoch 90, Loss: 0.009782
Test set: Average loss: 0.0080, Accuracy: 372/400 (93.00%)

Average train loss (excluding epoch 0): 0.1264


#Double Back with optimal weight = 50

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load Your Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)
import torch.optim.lr_scheduler as lr_scheduler

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

target_grad = 50

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 3.837
[Epoch 2] Training Loss: 0.503
[Epoch 3] Training Loss: 0.234
[Epoch 4] Training Loss: 0.115
[Epoch 5] Training Loss: 0.065
[Epoch 6] Training Loss: 0.037
[Epoch 7] Training Loss: 0.020
[Epoch 8] Training Loss: 0.026
[Epoch 9] Training Loss: 0.027
[Epoch 10] Training Loss: 0.011
[Epoch 11] Training Loss: 0.008
[Epoch 12] Training Loss: 0.002
[Epoch 13] Training Loss: 0.001
[Epoch 14] Training Loss: 0.001
[Epoch 15] Training Loss: 0.001
[Epoch 16] Training Loss: 0.001
[Epoch 17] Training Loss: 0.001
[Epoch 18] Training Loss: 0.000
[Epoch 19] Training Loss: 0.000
[Epoch 20] Training Loss: 0.000
[Epoch 21] Training Loss: 0.000
[Epoch 22] Training Loss: 0.000
[Epoch 23] Training Loss: 0.000
[Epoch 24] Training Loss: 0.000
[Epoch 25] Training Loss: 0.000
[Epoch 26] Training Loss: 0.000
[Epoch 27] Training Loss: 0.000
[Epoch 28] Training Loss: 0.000
[Epoch 29] Training Loss: 0.000
[Epoch 30] Training Loss: 0.000
[Epoch 31] Training Loss: 0.000
[Epoch 32] Traini

#SpectReg with optimal weight = 0.03

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss spectral optimal weight = 0.03
        loss += 0.03 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 5.073
[Epoch 2] Training Loss: 0.637
[Epoch 3] Training Loss: 0.401
[Epoch 4] Training Loss: 0.277
[Epoch 5] Training Loss: 0.208
[Epoch 6] Training Loss: 0.160
[Epoch 7] Training Loss: 0.125
[Epoch 8] Training Loss: 0.116
[Epoch 9] Training Loss: 0.111
[Epoch 10] Training Loss: 0.109
Finished Training
Accuracy on Test Set: 93.50 %


#CP with optimal weight = 0.01

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Confidence Penalty (CP) regularization coefficient
cp_coefficient = 0.01  # You can adjust this value as needed

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)

        # Confidence Penalty (CP) regularization
        confidence_penalty = -torch.mean(torch.sum(torch.softmax(output, dim=1) * torch.log_softmax(output, dim=1), dim=1))
        loss += cp_coefficient * confidence_penalty

        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))


Total parameters: 44426
Epoch 0, Loss: 1.798444
Epoch 10, Loss: 0.312383
Epoch 20, Loss: 0.042039
Epoch 30, Loss: 0.013317
Epoch 40, Loss: 0.064621
Epoch 50, Loss: 0.014346
Epoch 60, Loss: 0.001703
Epoch 70, Loss: 0.005903
Epoch 80, Loss: 0.041113
Epoch 90, Loss: 0.000777
Test set: Average loss: 0.0069, Accuracy: 372/400 (93.00%)

Average train loss (excluding epoch 0): 0.1071


#CP= 0.01 + DoubleBack = 10

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load Your Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Define the parameters for double back and confidence penalty regularization
double_back_weight = 10
cp_weight = 0.01

# Target gradient for double back
target_grad = 50

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Calculate gradient norm for double back regularization
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)

        # Backward pass with double back regularization
        loss.backward(retain_graph=True)

        # Apply double back regularization
        for param in net.parameters():
            param.grad *= double_back_weight / (grad_norm + 1e-8)

        # Confidence Penalty (CP) regularization
        confidence_penalty = -torch.mean(torch.sum(torch.softmax(outputs, dim=1) * torch.log_softmax(outputs, dim=1), dim=1))
        loss += cp_weight * confidence_penalty

        # Update parameters
        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 2.483
[Epoch 2] Training Loss: 0.509
[Epoch 3] Training Loss: 0.259
[Epoch 4] Training Loss: 0.152
[Epoch 5] Training Loss: 0.105
[Epoch 6] Training Loss: 0.060
[Epoch 7] Training Loss: 0.028
[Epoch 8] Training Loss: 0.016
[Epoch 9] Training Loss: 0.008
[Epoch 10] Training Loss: 0.005
[Epoch 11] Training Loss: 0.003
[Epoch 12] Training Loss: 0.002
[Epoch 13] Training Loss: 0.002
[Epoch 14] Training Loss: 0.002
[Epoch 15] Training Loss: 0.001
[Epoch 16] Training Loss: 0.001
[Epoch 17] Training Loss: 0.001
[Epoch 18] Training Loss: 0.001
[Epoch 19] Training Loss: 0.001
[Epoch 20] Training Loss: 0.001
[Epoch 21] Training Loss: 0.001
[Epoch 22] Training Loss: 0.001
[Epoch 23] Training Loss: 0.001
[Epoch 24] Training Loss: 0.001
[Epoch 25] Training Loss: 0.000
[Epoch 26] Training Loss: 0.000
[Epoch 27] Training Loss: 0.000
[Epoch 28] Training Loss: 0.000
[Epoch 29] Training Loss: 0.000
[Epoch 30] Training Loss: 0.000
[Epoch 31] Training Loss: 0.000
[Epoch 32] Traini

#Cp= = 0.01 + Spectral weight = 0.03

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty (CP) regularization
        confidence_penalty = -torch.mean(torch.sum(torch.softmax(outputs, dim=1) * torch.log_softmax(outputs, dim=1), dim=1))
        loss += 0.01 * confidence_penalty

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss
        loss += 0.03 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 4.100
[Epoch 2] Training Loss: 0.509
[Epoch 3] Training Loss: 0.303
[Epoch 4] Training Loss: 0.201
[Epoch 5] Training Loss: 0.147
[Epoch 6] Training Loss: 0.112
[Epoch 7] Training Loss: 0.104
[Epoch 8] Training Loss: 0.099
[Epoch 9] Training Loss: 0.095
[Epoch 10] Training Loss: 0.092
Finished Training
Accuracy on Test Set: 95.50 %


#cifar-10

#Baseline Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 5.183
[Epoch 2] Training Loss: 2.935
[Epoch 3] Training Loss: 2.302
[Epoch 4] Training Loss: 1.964
[Epoch 5] Training Loss: 1.911
[Epoch 6] Training Loss: 1.793
[Epoch 7] Training Loss: 1.693
[Epoch 8] Training Loss: 1.671
[Epoch 9] Training Loss: 1.654
[Epoch 10] Training Loss: 1.597
[Epoch 11] Training Loss: 1.565
[Epoch 12] Training Loss: 1.495
[Epoch 13] Training Loss: 1.456
[Epoch 14] Training Loss: 1.384
[Epoch 15] Training Loss: 1.304
[Epoch 16] Training Loss: 1.330
[Epoch 17] Training Loss: 1.352
[Epoch 18] Training Loss: 1.171
[Epoch 19] Training Loss: 1.191
[Epoch 20] Training Loss: 1.157
[Epoch 21] Training Loss: 1.054
[Epoch 22] Training Loss: 1.060
[Epoch 23] Training Loss: 1.015
[Epoch 24] Training Loss: 0.877
[Epoch 25] Training Loss: 0.999
[Epoch 26] Training Loss: 0.866
[Epoch 27] Training Loss: 0.894
[Epoch 28] Training Loss: 0.755
[Epoch 29] Training Loss: 1.002
[Epoc

#Double back optimal weight = 1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 1

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
          optimizer.step()

        # Print statistics
        running_loss += loss.item()


    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 2.195
[Epoch 2] Training Loss: 1.887
[Epoch 3] Training Loss: 1.799
[Epoch 4] Training Loss: 1.547
[Epoch 5] Training Loss: 1.362
[Epoch 6] Training Loss: 1.252
[Epoch 7] Training Loss: 1.117
[Epoch 8] Training Loss: 1.019
[Epoch 9] Training Loss: 0.894
[Epoch 10] Training Loss: 0.838
[Epoch 11] Training Loss: 0.664
[Epoch 12] Training Loss: 0.696
[Epoch 13] Training Loss: 0.631
[Epoch 14] Training Loss: 0.620
[Epoch 15] Training Loss: 0.666
[Epoch 16] Training Loss: 0.580
[Epoch 17] Training Loss: 0.547
[Epoch 18] Training Loss: 0.533
[Epoch 19] Training Loss: 0.500
[Epoch 20] Training Loss: 0.523
[Epoch 21] Training Loss: 0.487
[Epoch 22] Training Loss: 0.507
[Epoch 23] Training Loss: 0.462
[Epoch 24] Training Loss: 0.466
[Epoch 25] Training Loss: 0.458
[Epoch 26] Training Loss: 0.409
[Epoch 27] Training Loss: 0.484
[Epoch 28] Training Loss: 0.507
[Epoch 29] Training Loss: 0.443
[Epoc

#spectral = 0.03

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 1

# Spectral regularization weight
spectral_weight = 0.03

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
          optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 79272327.65it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified




[Epoch 1] Training Loss: 10.542
[Epoch 2] Training Loss: 10.285
[Epoch 3] Training Loss: 10.150
[Epoch 4] Training Loss: 9.963
[Epoch 5] Training Loss: 9.756
[Epoch 6] Training Loss: 9.622
[Epoch 7] Training Loss: 9.455
[Epoch 8] Training Loss: 9.356
[Epoch 9] Training Loss: 9.231
[Epoch 10] Training Loss: 9.162
[Epoch 11] Training Loss: 9.070
[Epoch 12] Training Loss: 9.073
[Epoch 13] Training Loss: 9.032
[Epoch 14] Training Loss: 8.992
[Epoch 15] Training Loss: 8.940
[Epoch 16] Training Loss: 8.909
[Epoch 17] Training Loss: 8.881
[Epoch 18] Training Loss: 8.798
[Epoch 19] Training Loss: 8.866
[Epoch 20] Training Loss: 8.842
[Epoch 21] Training Loss: 8.830
[Epoch 22] Training Loss: 8.807
[Epoch 23] Training Loss: 8.816
[Epoch 24] Training Loss: 8.758
[Epoch 25] Training Loss: 8.798
[Epoch 26] Training Loss: 8.861
[Epoch 27] Training Loss: 8.745
[Epoch 28] Training Loss: 8.852
[Epoch 29] Training Loss: 8.753
[Epoch 30] Training Loss: 8.761
[Epoch 31] Training Loss: 8.769
[Epoch 32] Tra

#Cp with optimal weight = 0.003

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision.transforms as transforms
# import torchvision.datasets as datasets
# from torch.utils.data import DataLoader, Subset
# import torchvision.models as models

# # Transformations for the dataset
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
# ])

# # Load CIFAR-10 training and testing datasets
# trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
# testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function with CP regularization
class CPRegularizedLoss(nn.Module):
    def __init__(self, base_criterion, confidence_penalty_weight):
        super(CPRegularizedLoss, self).__init__()
        self.base_criterion = base_criterion
        self.confidence_penalty_weight = confidence_penalty_weight

    def forward(self, outputs, labels):
        base_loss = self.base_criterion(outputs, labels)
        softmax_outputs = nn.functional.softmax(outputs, dim=1)
        confidence_penalty = -torch.mean(torch.sum(softmax_outputs * torch.log(softmax_outputs + 1e-10), dim=1))
        return base_loss + self.confidence_penalty_weight * confidence_penalty

criterion = CPRegularizedLoss(nn.CrossEntropyLoss(), confidence_penalty_weight=0.003)

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


[Epoch 1] Training Loss: 4.194
[Epoch 2] Training Loss: 3.026
[Epoch 3] Training Loss: 2.197
[Epoch 4] Training Loss: 1.905
[Epoch 5] Training Loss: 1.853
[Epoch 6] Training Loss: 1.739
[Epoch 7] Training Loss: 1.628
[Epoch 8] Training Loss: 1.610
[Epoch 9] Training Loss: 1.599
[Epoch 10] Training Loss: 1.578
[Epoch 11] Training Loss: 1.543
[Epoch 12] Training Loss: 1.412
[Epoch 13] Training Loss: 1.335
[Epoch 14] Training Loss: 1.314
[Epoch 15] Training Loss: 1.303
[Epoch 16] Training Loss: 1.346
[Epoch 17] Training Loss: 1.156
[Epoch 18] Training Loss: 1.105
[Epoch 19] Training Loss: 1.037
[Epoch 20] Training Loss: 1.056
[Epoch 21] Training Loss: 1.002
[Epoch 22] Training Loss: 0.920
[Epoch 23] Training Loss: 0.887
[Epoch 24] Training Loss: 0.800
[Epoch 25] Training Loss: 0.792
[Epoch 26] Training Loss: 0.766
[Epoch 27] Training Loss: 0.763
[Epoch 28] Training Loss: 0.668
[Epoch 29] Training Loss: 0.848
[Epoch 30] Training Loss: 0.702
[Epoch 31] Training Loss: 0.676
[Epoch 32] Traini

# cp + Double back used

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Confidence Penalty weight
cp_weight = 0.003

# Target gradient for clipping
target_grad = 1

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty term
        penalty = cp_weight * torch.mean((torch.softmax(outputs, dim=1) - torch.eye(10)[labels]) ** 2)

        # Add penalty to the loss
        loss += penalty

        # Backward pass
        loss.backward()

        # Clip gradients
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)

        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


[Epoch 1] Training Loss: 4.757
[Epoch 2] Training Loss: 2.919
[Epoch 3] Training Loss: 2.392
[Epoch 4] Training Loss: 2.026
[Epoch 5] Training Loss: 1.836
[Epoch 6] Training Loss: 1.717
[Epoch 7] Training Loss: 1.769
[Epoch 8] Training Loss: 1.651
[Epoch 9] Training Loss: 1.560
[Epoch 10] Training Loss: 1.474
[Epoch 11] Training Loss: 1.484
[Epoch 12] Training Loss: 1.416
[Epoch 13] Training Loss: 1.342
[Epoch 14] Training Loss: 1.248
[Epoch 15] Training Loss: 1.286
[Epoch 16] Training Loss: 1.183
[Epoch 17] Training Loss: 1.080
[Epoch 18] Training Loss: 1.125
[Epoch 19] Training Loss: 1.035
[Epoch 20] Training Loss: 0.993
[Epoch 21] Training Loss: 0.988
[Epoch 22] Training Loss: 0.863
[Epoch 23] Training Loss: 0.894
[Epoch 24] Training Loss: 0.828
[Epoch 25] Training Loss: 0.811
[Epoch 26] Training Loss: 0.766
[Epoch 27] Training Loss: 0.644
[Epoch 28] Training Loss: 0.690
[Epoch 29] Training Loss: 0.641
[Epoch 30] Training Loss: 0.650
[Epoch 31] Training Loss: 0.710
[Epoch 32] Traini

# cp +spectreg

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 1

# Confidence Penalty weight
cp_weight = 0.003

# Spectral regularization weight
spectral_weight = 0.03

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        # Confidence Penalty term
        penalty = cp_weight * torch.mean((torch.softmax(outputs, dim=1) - torch.eye(10)[labels]) ** 2)


        # Add penalty to the loss
        loss += penalty

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 10.570
[Epoch 2] Training Loss: 10.284
[Epoch 3] Training Loss: 10.189
[Epoch 4] Training Loss: 10.040
[Epoch 5] Training Loss: 9.822
[Epoch 6] Training Loss: 9.709
[Epoch 7] Training Loss: 9.535
[Epoch 8] Training Loss: 9.323
[Epoch 9] Training Loss: 9.286
[Epoch 10] Training Loss: 9.193
[Epoch 11] Training Loss: 9.136
[Epoch 12] Training Loss: 9.059
[Epoch 13] Training Loss: 9.057
[Epoch 14] Training Loss: 8.980
[Epoch 15] Training Loss: 8.959
[Epoch 16] Training Loss: 8.910
[Epoch 17] Training Loss: 8.941
[Epoch 18] Training Loss: 8.876
[Epoch 19] Training Loss: 8.864
[Epoch 20] Training Loss: 8.808
[Epoch 21] Training Loss: 8.855
[Epoch 22] Training Loss: 8.798
[Epoch 23] Training Loss: 8.802
[Epoch 24] Training Loss: 8.782
[Epoch 25] Training Loss: 8.821
[Epoch 26] Training Loss: 8.821
[Epoch 27] Training Loss: 8.766
[Epoch 28] Training Loss: 8.826
[Epoch 29] Training Loss: 8.803
[

#CIPHER-100

#base line

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 5.577
[Epoch 2] Training Loss: 4.901
[Epoch 3] Training Loss: 4.421
[Epoch 4] Training Loss: 4.146
[Epoch 5] Training Loss: 4.008
[Epoch 6] Training Loss: 4.041
[Epoch 7] Training Loss: 3.848
[Epoch 8] Training Loss: 3.695
[Epoch 9] Training Loss: 3.605
[Epoch 10] Training Loss: 3.495
[Epoch 11] Training Loss: 3.367
[Epoch 12] Training Loss: 3.222
[Epoch 13] Training Loss: 3.058
[Epoch 14] Training Loss: 2.945
[Epoch 15] Training Loss: 2.793
[Epoch 16] Training Loss: 2.635
[Epoch 17] Training Loss: 2.462
[Epoch 18] Training Loss: 2.292
[Epoch 19] Training Loss: 2.259
[Epoch 20] Training Loss: 2.156
[Epoch 21] Training Loss: 1.959
[Epoch 22] Training Loss: 1.749
[Epoch 23] Training Loss: 1.709
[Epoch 24] Training Loss: 1.589
[Epoch 25] Training Loss: 1.401
[Epoch 26] Training Loss: 1.242
[Epoch 27] Training Loss: 1.338
[Epoch 28] Training Loss: 1.117
[Epoch 29] Training Loss: 0.959
[Epoc

#Doule back without cp

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 0.003

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
          optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 4.875
[Epoch 2] Training Loss: 4.836
[Epoch 3] Training Loss: 4.799
[Epoch 4] Training Loss: 4.779
[Epoch 5] Training Loss: 4.740
[Epoch 6] Training Loss: 4.722
[Epoch 7] Training Loss: 4.698
[Epoch 8] Training Loss: 4.663
[Epoch 9] Training Loss: 4.642
[Epoch 10] Training Loss: 4.632
[Epoch 11] Training Loss: 4.610
[Epoch 12] Training Loss: 4.581
[Epoch 13] Training Loss: 4.582
[Epoch 14] Training Loss: 4.545
[Epoch 15] Training Loss: 4.516
[Epoch 16] Training Loss: 4.495
[Epoch 17] Training Loss: 4.489
[Epoch 18] Training Loss: 4.457
[Epoch 19] Training Loss: 4.430
[Epoch 20] Training Loss: 4.419
[Epoch 21] Training Loss: 4.414
[Epoch 22] Training Loss: 4.370
[Epoch 23] Training Loss: 4.372
[Epoch 24] Training Loss: 4.363
[Epoch 25] Training Loss: 4.324
[Epoch 26] Training Loss: 4.316
[Epoch 27] Training Loss: 4.293
[Epoch 28] Training Loss: 4.286
[Epoch 29] Training Loss: 4.256
[Epoc

#Spectreg without cp

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral Regularization weight
spectreg_weight = 0.00003

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Spectral Regularization
        spectral_loss = 0
        for name, param in model.named_parameters():
            if 'weight' in name:
                spectral_loss += torch.norm(param, p='fro')

        loss += spectreg_weight * spectral_loss

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 4.892
[Epoch 2] Training Loss: 4.867
[Epoch 3] Training Loss: 4.845
[Epoch 4] Training Loss: 4.791
[Epoch 5] Training Loss: 4.775
[Epoch 6] Training Loss: 4.755
[Epoch 7] Training Loss: 4.727
[Epoch 8] Training Loss: 4.703
[Epoch 9] Training Loss: 4.674
[Epoch 10] Training Loss: 4.647
[Epoch 11] Training Loss: 4.647
[Epoch 12] Training Loss: 4.621
[Epoch 13] Training Loss: 4.606
[Epoch 14] Training Loss: 4.584
[Epoch 15] Training Loss: 4.570
[Epoch 16] Training Loss: 4.526
[Epoch 17] Training Loss: 4.528
[Epoch 18] Training Loss: 4.497
[Epoch 19] Training Loss: 4.487
[Epoch 20] Training Loss: 4.454
[Epoch 21] Training Loss: 4.464
[Epoch 22] Training Loss: 4.433
[Epoch 23] Training Loss: 4.402
[Epoch 24] Training Loss: 4.401
[Epoch 25] Training Loss: 4.358
[Epoch 26] Training Loss: 4.346
[Epoch 27] Training Loss: 4.349
[Epoch 28] Training Loss: 4.324
[Epoch 29] Training Loss: 4.310
[Epoc

# implementation of CP = 0.00001 + double back =  0.003  = 0.00001

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
import torch.optim.lr_scheduler as lr_scheduler

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Target weight for double backpropagation
target_grad = 0.003

# Confidence Penalty weight
cp_weight = 0.00001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty
        outputs_softmax = nn.functional.softmax(outputs, dim=1)
        grad_outputs = torch.autograd.grad(outputs=outputs_softmax, inputs=inputs,
                                           grad_outputs=torch.ones_like(outputs_softmax),
                                           create_graph=True)[0]
        cp_loss = torch.mean(torch.norm(grad_outputs, p=2, dim=1))

        loss += cp_weight * cp_loss

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)

        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 4.877
[Epoch 2] Training Loss: 4.835
[Epoch 3] Training Loss: 4.829
[Epoch 4] Training Loss: 4.777
[Epoch 5] Training Loss: 4.772
[Epoch 6] Training Loss: 4.731
[Epoch 7] Training Loss: 4.706
[Epoch 8] Training Loss: 4.699
[Epoch 9] Training Loss: 4.660
[Epoch 10] Training Loss: 4.647
[Epoch 11] Training Loss: 4.630
[Epoch 12] Training Loss: 4.601
[Epoch 13] Training Loss: 4.578
[Epoch 14] Training Loss: 4.562
[Epoch 15] Training Loss: 4.542
[Epoch 16] Training Loss: 4.523
[Epoch 17] Training Loss: 4.493
[Epoch 18] Training Loss: 4.477
[Epoch 19] Training Loss: 4.461
[Epoch 20] Training Loss: 4.447
[Epoch 21] Training Loss: 4.427
[Epoch 22] Training Loss: 4.403
[Epoch 23] Training Loss: 4.392
[Epoch 24] Training Loss: 4.375
[Epoch 25] Training Loss: 4.368
[Epoch 26] Training Loss: 4.343
[Epoch 27] Training Loss: 4.324
[Epoch 28] Training Loss: 4.308
[Epoch 29] Training Loss: 4.297
[Epoc

# cp = 0.00001

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Confidence Penalty weight
cp_weight = 0.00001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Confidence Penalty
        max_prob, _ = torch.max(nn.functional.softmax(outputs, dim=1), dim=1)
        cp_loss = torch.mean(max_prob)

        loss += cp_weight * cp_loss

        # Backward pass
        loss.backward()

        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.003)
        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 4.880
[Epoch 2] Training Loss: 4.851
[Epoch 3] Training Loss: 4.819
[Epoch 4] Training Loss: 4.806
[Epoch 5] Training Loss: 4.772
[Epoch 6] Training Loss: 4.755
[Epoch 7] Training Loss: 4.714
[Epoch 8] Training Loss: 4.700
[Epoch 9] Training Loss: 4.682
[Epoch 10] Training Loss: 4.633
[Epoch 11] Training Loss: 4.631
[Epoch 12] Training Loss: 4.616
[Epoch 13] Training Loss: 4.591
[Epoch 14] Training Loss: 4.558
[Epoch 15] Training Loss: 4.546
[Epoch 16] Training Loss: 4.533
[Epoch 17] Training Loss: 4.510
[Epoch 18] Training Loss: 4.482
[Epoch 19] Training Loss: 4.472
[Epoch 20] Training Loss: 4.455
[Epoch 21] Training Loss: 4.430
[Epoch 22] Training Loss: 4.428
[Epoch 23] Training Loss: 4.397
[Epoch 24] Training Loss: 4.400
[Epoch 25] Training Loss: 4.366
[Epoch 26] Training Loss: 4.356
[Epoch 27] Training Loss: 4.327
[Epoch 28] Training Loss: 4.286
[Epoch 29] Training Loss: 4.290
[Epoc

#Spect Reg = 0.00003 + Cp = 0.00001

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
test_indices = range(1000)  # Take only 1000 samples for testing
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral Regularization weight
spectreg_weight = 0.00003

# Confidence Penalty weight
cp_weight = 0.00001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Spectral Regularization
        spectral_loss = 0
        for name, param in model.named_parameters():
            if 'weight' in name:
                spectral_loss += torch.norm(param, p='fro')

        loss += spectreg_weight * spectral_loss

        # Confidence Penalty
        max_prob, _ = torch.max(nn.functional.softmax(outputs, dim=1), dim=1)
        cp_loss = torch.mean(max_prob)

        loss += cp_weight * cp_loss

        # Backward pass
        loss.backward()

        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 5.893
[Epoch 2] Training Loss: 5.113
[Epoch 3] Training Loss: 4.489
[Epoch 4] Training Loss: 4.240
[Epoch 5] Training Loss: 4.082
[Epoch 6] Training Loss: 3.940
[Epoch 7] Training Loss: 3.798
[Epoch 8] Training Loss: 3.637
[Epoch 9] Training Loss: 3.517
[Epoch 10] Training Loss: 3.421
[Epoch 11] Training Loss: 3.246
[Epoch 12] Training Loss: 3.029
[Epoch 13] Training Loss: 2.847
[Epoch 14] Training Loss: 2.681
[Epoch 15] Training Loss: 2.436
[Epoch 16] Training Loss: 2.215
[Epoch 17] Training Loss: 1.903
[Epoch 18] Training Loss: 1.638
[Epoch 19] Training Loss: 1.447
[Epoch 20] Training Loss: 1.262
[Epoch 21] Training Loss: 1.129
[Epoch 22] Training Loss: 0.934
[Epoch 23] Training Loss: 1.084
[Epoch 24] Training Loss: 0.730
[Epoch 25] Training Loss: 0.522
[Epoch 26] Training Loss: 0.451
[Epoch 27] Training Loss: 0.312
[Epoch 28] Training Loss: 0.249
[Epoch 29] Training Loss: 0.240
[Epoc