In [1]:
!pip install torch torchvision


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m48.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# on complete data set baseline model without weight decay

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Split the training dataset into training and validation sets
train_dataset, val_dataset = random_split(train_dataset, [50000, 10000])

# Define data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x)))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))


Total parameters: 44426
Test set: Average loss: 0.0005, Accuracy: 9900/10000 (99.00%)



#Small_mnist data set without weight (baseline)

In [61]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Print total parameter count
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))



Total parameters: 44426
Epoch 0, Loss: 1.831928
Epoch 10, Loss: 0.241944
Epoch 20, Loss: 0.145906
Epoch 30, Loss: 0.098997
Epoch 40, Loss: 0.025753
Epoch 50, Loss: 0.006105
Epoch 60, Loss: 0.049248
Epoch 70, Loss: 0.000126
Epoch 80, Loss: 0.312149
Epoch 90, Loss: 0.027915
Test set: Average loss: 0.0091, Accuracy: 371/400 (92.75%)

Average train loss (excluding epoch 0): 0.1072


#small_mnist data set with weight decay = 0.0005 (baseline model)

In [62]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28, 1))
        label = int(self.data.iloc[idx, 0])

        if self.transform:
            image = self.transform(image)

        return image, label

# Load the small MNIST dataset from CSV
mnist_df = pd.read_csv("small_mnist.csv")

# Split dataset into train and test sets
train_df, test_df = train_test_split(mnist_df, test_size=0.2, random_state=42)

# Define transforms to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create custom datasets
train_dataset = CustomDataset(train_df, transform=transform)
test_dataset = CustomDataset(test_df, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)
# Define LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # Dropout with probability 0.5

    def forward(self, x):
        x = self.maxpool(self.relu(self.conv1(x.float())))
        x = self.maxpool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
model = LeNet5()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define optimizer with weight decay
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)

# Train the model
epochs = 100
train_losses = []  # To store train losses for calculating average later
for epoch in range(epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if epoch > 0:  # Append loss only after epoch 0
                train_losses.append(loss.item())
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print('Epoch {}, Loss: {:.6f}'.format(epoch, loss.item()))

# Evaluate the model on test set
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)

# Calculate average train loss (excluding epoch 0)
average_train_loss = sum(train_losses) / len(train_losses)

print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset), accuracy))
print('Average train loss (excluding epoch 0): {:.4f}'.format(average_train_loss))


Epoch 0, Loss: 1.906633
Epoch 10, Loss: 0.065698
Epoch 20, Loss: 0.049440
Epoch 30, Loss: 0.097521
Epoch 40, Loss: 0.018282
Epoch 50, Loss: 0.010948
Epoch 60, Loss: 0.089113
Epoch 70, Loss: 0.133155
Epoch 80, Loss: 0.003373
Epoch 90, Loss: 0.105161
Test set: Average loss: 0.0085, Accuracy: 379/400 (94.75%)

Average train loss (excluding epoch 0): 0.0886


# Implementing Data_grad with all default parameters on MNIST data set

In [55]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load MNIST dataset with 50,000 images for training and 10,000 images for testing
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainset, _ = torch.utils.data.random_split(trainset, [50000, 10000])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with default parameters
optimizer = optim.Adam(net.parameters())

# Training loop
for epoch in range(5):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

    # Evaluate on test set
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %.2f %%' % (100 * correct / total))

print('Finished Training')


[1,   100] loss: 1.123
[1,   200] loss: 0.269
[1,   300] loss: 0.189
[1,   400] loss: 0.142
[1,   500] loss: 0.128
[1,   600] loss: 0.117
[1,   700] loss: 0.106
Accuracy of the network on the 10000 test images: 97.05 %
[2,   100] loss: 0.076
[2,   200] loss: 0.081
[2,   300] loss: 0.083
[2,   400] loss: 0.076
[2,   500] loss: 0.062
[2,   600] loss: 0.076
[2,   700] loss: 0.063
Accuracy of the network on the 10000 test images: 98.45 %
[3,   100] loss: 0.050
[3,   200] loss: 0.054
[3,   300] loss: 0.058
[3,   400] loss: 0.057
[3,   500] loss: 0.053
[3,   600] loss: 0.047
[3,   700] loss: 0.053
Accuracy of the network on the 10000 test images: 98.62 %
[4,   100] loss: 0.033
[4,   200] loss: 0.046
[4,   300] loss: 0.038
[4,   400] loss: 0.051
[4,   500] loss: 0.040
[4,   600] loss: 0.041
[4,   700] loss: 0.042
Accuracy of the network on the 10000 test images: 98.52 %
[5,   100] loss: 0.025
[5,   200] loss: 0.036
[5,   300] loss: 0.025
[5,   400] loss: 0.042
[5,   500] loss: 0.032
[5,   600

#with optimal data_grad weight 50 + without weight decay

In [89]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load Your Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters())

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


[Epoch 1] Training Loss: 2.696
[Epoch 2] Training Loss: 0.625
[Epoch 3] Training Loss: 0.317
[Epoch 4] Training Loss: 0.158
[Epoch 5] Training Loss: 0.069
[Epoch 6] Training Loss: 0.034
[Epoch 7] Training Loss: 0.018
[Epoch 8] Training Loss: 0.009
[Epoch 9] Training Loss: 0.005
[Epoch 10] Training Loss: 0.003
[Epoch 11] Training Loss: 0.002
[Epoch 12] Training Loss: 0.002
[Epoch 13] Training Loss: 0.002
[Epoch 14] Training Loss: 0.001
[Epoch 15] Training Loss: 0.001
[Epoch 16] Training Loss: 0.001
[Epoch 17] Training Loss: 0.001
[Epoch 18] Training Loss: 0.001
[Epoch 19] Training Loss: 0.001
[Epoch 20] Training Loss: 0.001
[Epoch 21] Training Loss: 0.001
[Epoch 22] Training Loss: 0.001
[Epoch 23] Training Loss: 0.001
[Epoch 24] Training Loss: 0.000
[Epoch 25] Training Loss: 0.000
[Epoch 26] Training Loss: 0.000
[Epoch 27] Training Loss: 0.000
[Epoch 28] Training Loss: 0.000
[Epoch 29] Training Loss: 0.000
[Epoch 30] Training Loss: 0.000
[Epoch 31] Training Loss: 0.000
[Epoch 32] Traini

#Data grad   50 + weight decay = 0.0005

In [91]:
import torch.optim.lr_scheduler as lr_scheduler

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.0005)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

target_grad = 50

# Training loop
for epoch in range(100):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 2.667
[Epoch 2] Training Loss: 0.462
[Epoch 3] Training Loss: 0.231
[Epoch 4] Training Loss: 0.134
[Epoch 5] Training Loss: 0.087
[Epoch 6] Training Loss: 0.045
[Epoch 7] Training Loss: 0.024
[Epoch 8] Training Loss: 0.020
[Epoch 9] Training Loss: 0.011
[Epoch 10] Training Loss: 0.005
[Epoch 11] Training Loss: 0.002
[Epoch 12] Training Loss: 0.001
[Epoch 13] Training Loss: 0.001
[Epoch 14] Training Loss: 0.001
[Epoch 15] Training Loss: 0.001
[Epoch 16] Training Loss: 0.001
[Epoch 17] Training Loss: 0.001
[Epoch 18] Training Loss: 0.001
[Epoch 19] Training Loss: 0.000
[Epoch 20] Training Loss: 0.000
[Epoch 21] Training Loss: 0.000
[Epoch 22] Training Loss: 0.000
[Epoch 23] Training Loss: 0.000
[Epoch 24] Training Loss: 0.000
[Epoch 25] Training Loss: 0.000
[Epoch 26] Training Loss: 0.000
[Epoch 27] Training Loss: 0.000
[Epoch 28] Training Loss: 0.000
[Epoch 29] Training Loss: 0.000
[Epoch 30] Training Loss: 0.000
[Epoch 31] Training Loss: 0.000
[Epoch 32] Traini

#Spect Reg + with weight decay on small_mnist

In [108]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.0005)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss spectral weight = 0.05
        loss += 0.05 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 5.278
[Epoch 2] Training Loss: 0.731
[Epoch 3] Training Loss: 0.409
[Epoch 4] Training Loss: 0.283
[Epoch 5] Training Loss: 0.209
[Epoch 6] Training Loss: 0.181
[Epoch 7] Training Loss: 0.165
[Epoch 8] Training Loss: 0.156
[Epoch 9] Training Loss: 0.154
[Epoch 10] Training Loss: 0.151
Finished Training
Accuracy on Test Set: 94.00 %


# spectreg without weight decay

In [109]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

# Custom Dataset Class
class CustomMNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data.iloc[idx, 1:].values.astype(float).reshape((28, 28))
        label = self.data.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, label

# Load your CSV dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the dataset
dataset = CustomMNISTDataset(csv_file='small_mnist.csv', transform=transform)

# Split dataset into training and testing sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.float())))  # Cast input to float to match bias data type
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize LeNet model
net = LeNet()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define Adam optimizer with weight decay
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True, min_lr=1e-6)

# Target gradient for spectral regularization
target_grad = 50

# Apply spectral normalization to the linear layers
net.fc1 = nn.utils.spectral_norm(net.fc1)
net.fc2 = nn.utils.spectral_norm(net.fc2)
net.fc3 = nn.utils.spectral_norm(net.fc3)

# Training loop
for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Spectral regularization
        spectreg_loss = 0
        for name, module in net.named_modules():
            if isinstance(module, nn.Linear):
                # Compute the spectral regularization loss
                spectreg_loss += torch.norm(module.weight_u, p=2)

        # Add the spectral regularization loss to the main loss with spectral weight = 0.03
        loss += 0.03 * spectreg_loss

        # Backward pass
        loss.backward()

        # Get the gradient norm and adjust learning rate
        grad_norm = nn.utils.clip_grad_norm_(net.parameters(), max_norm=target_grad)
        if grad_norm > target_grad:
            optimizer.step()
            scheduler.step(loss)
        else:
            optimizer.step()

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))




[Epoch 1] Training Loss: 3.172
[Epoch 2] Training Loss: 0.721
[Epoch 3] Training Loss: 0.388
[Epoch 4] Training Loss: 0.217
[Epoch 5] Training Loss: 0.155
[Epoch 6] Training Loss: 0.119
[Epoch 7] Training Loss: 0.122
[Epoch 8] Training Loss: 0.114
[Epoch 9] Training Loss: 0.094
[Epoch 10] Training Loss: 0.091
Finished Training
Accuracy on Test Set: 93.50 %


#CIFAR-10

# base line model without weight decay

In [2]:
### ciphar 10 base line model
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(30):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 5.280
[Epoch 2] Training Loss: 3.162
[Epoch 3] Training Loss: 2.529
[Epoch 4] Training Loss: 2.148
[Epoch 5] Training Loss: 2.098
[Epoch 6] Training Loss: 2.113
[Epoch 7] Training Loss: 1.952
[Epoch 8] Training Loss: 1.737
[Epoch 9] Training Loss: 1.687
[Epoch 10] Training Loss: 1.586
[Epoch 11] Training Loss: 1.582
[Epoch 12] Training Loss: 1.510
[Epoch 13] Training Loss: 1.476
[Epoch 14] Training Loss: 1.528
[Epoch 15] Training Loss: 1.361
[Epoch 16] Training Loss: 1.283
[Epoch 17] Training Loss: 1.224
[Epoch 18] Training Loss: 1.136
[Epoch 19] Training Loss: 1.050
[Epoch 20] Training Loss: 0.926
[Epoch 21] Training Loss: 0.873
[Epoch 22] Training Loss: 0.932
[Epoch 23] Training Loss: 0.869
[Epoch 24] Training Loss: 0.827
[Epoch 25] Training Loss: 0.578
[Epoch 26] Training Loss: 0.633
[Epoch 27] Training Loss: 0.518
[Epoch 28] Training Loss: 0.446
[Epoch 29] Training Loss: 0.456
[Epoc

#bse line model with weight decay = 0.003

In [4]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision.transforms as transforms
# import torchvision.datasets as datasets
# from torch.utils.data import DataLoader, Subset
# import torchvision.models as models

# # Transformations for the dataset
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
# ])

# # Load CIFAR-10 training and testing datasets
# trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
# testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay= 0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


[Epoch 1] Training Loss: 4.893
[Epoch 2] Training Loss: 3.340
[Epoch 3] Training Loss: 2.222
[Epoch 4] Training Loss: 2.084
[Epoch 5] Training Loss: 1.924
[Epoch 6] Training Loss: 1.873
[Epoch 7] Training Loss: 1.798
[Epoch 8] Training Loss: 1.753
[Epoch 9] Training Loss: 1.660
[Epoch 10] Training Loss: 1.599
[Epoch 11] Training Loss: 1.557
[Epoch 12] Training Loss: 1.496
[Epoch 13] Training Loss: 1.420
[Epoch 14] Training Loss: 1.395
[Epoch 15] Training Loss: 1.340
[Epoch 16] Training Loss: 1.339
[Epoch 17] Training Loss: 1.217
[Epoch 18] Training Loss: 1.205
[Epoch 19] Training Loss: 1.232
[Epoch 20] Training Loss: 1.092
[Epoch 21] Training Loss: 1.167
[Epoch 22] Training Loss: 0.967
[Epoch 23] Training Loss: 1.003
[Epoch 24] Training Loss: 0.938
[Epoch 25] Training Loss: 0.923
[Epoch 26] Training Loss: 1.000
[Epoch 27] Training Loss: 0.797
[Epoch 28] Training Loss: 0.889
[Epoch 29] Training Loss: 0.776
[Epoch 30] Training Loss: 0.753
[Epoch 31] Training Loss: 0.693
[Epoch 32] Traini

# spect reg = 0.001 without weight decay


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral regularization weight
spectral_weight = 0.001

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        # Step with optimizer
        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified




[Epoch 1] Training Loss: 4.570
[Epoch 2] Training Loss: 3.377
[Epoch 3] Training Loss: 3.323
[Epoch 4] Training Loss: 2.521
[Epoch 5] Training Loss: 2.353
[Epoch 6] Training Loss: 2.164
[Epoch 7] Training Loss: 2.099
[Epoch 8] Training Loss: 2.009
[Epoch 9] Training Loss: 1.958
[Epoch 10] Training Loss: 1.933
[Epoch 11] Training Loss: 1.834
[Epoch 12] Training Loss: 1.766
[Epoch 13] Training Loss: 1.697
[Epoch 14] Training Loss: 1.635
[Epoch 15] Training Loss: 1.538
[Epoch 16] Training Loss: 1.471
[Epoch 17] Training Loss: 1.440
[Epoch 18] Training Loss: 1.314
[Epoch 19] Training Loss: 1.308
[Epoch 20] Training Loss: 1.176
[Epoch 21] Training Loss: 1.146
[Epoch 22] Training Loss: 1.108
[Epoch 23] Training Loss: 1.112
[Epoch 24] Training Loss: 0.981
[Epoch 25] Training Loss: 0.893
[Epoch 26] Training Loss: 0.885
[Epoch 27] Training Loss: 0.865
[Epoch 28] Training Loss: 0.713
[Epoch 29] Training Loss: 0.728
[Epoch 30] Training Loss: 0.608
[Epoch 31] Training Loss: 0.666
[Epoch 32] Traini

# spect reg = 0.03 with weight decay = 0.0005


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-10 mean and std for each channel
])

# Load CIFAR-10 training and testing datasets
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 10
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 200 samples per class for training
train_indices = get_limited_indices(trainset, 200)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=10)  # CIFAR-10 has 10 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Spectral regularization weight
spectral_weight = 0.03

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs.requires_grad_(True)  # Set requires_grad=True for inputs

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Compute spectral regularization loss
        spectreg_loss = 0
        for name, module in model.named_modules():
            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                weight = module.weight.view(module.weight.size(0), -1)
                u, _, _ = torch.svd(weight)
                spectreg_loss += torch.norm(u, p=2)

        # Add spectral regularization loss to the main loss
        loss += spectreg_loss * spectral_weight

        # Step with optimizer
        optimizer.step()
        scheduler.step(loss)

        # Print statistics
        running_loss += loss.item()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 13.418
[Epoch 2] Training Loss: 11.300
[Epoch 3] Training Loss: 11.084
[Epoch 4] Training Loss: 10.645
[Epoch 5] Training Loss: 10.425
[Epoch 6] Training Loss: 10.309
[Epoch 7] Training Loss: 10.194
[Epoch 8] Training Loss: 10.123
[Epoch 9] Training Loss: 10.107
[Epoch 10] Training Loss: 10.041
[Epoch 11] Training Loss: 9.977
[Epoch 12] Training Loss: 9.902
[Epoch 13] Training Loss: 9.895
[Epoch 14] Training Loss: 9.792
[Epoch 15] Training Loss: 9.754
[Epoch 16] Training Loss: 9.674
[Epoch 17] Training Loss: 9.592
[Epoch 18] Training Loss: 9.585
[Epoch 19] Training Loss: 9.484
[Epoch 20] Training Loss: 9.522
[Epoch 21] Training Loss: 9.322
[Epoch 22] Training Loss: 9.280
[Epoch 23] Training Loss: 9.405
[Epoch 24] Training Loss: 9.358
[Epoch 25] Training Loss: 9.099
[Epoch 26] Training Loss: 9.048
[Epoch 27] Training Loss: 9.031
[Epoch 28] Training Loss: 9.071
[Epoch 29] Training Loss: 8

# ciphar-100

# base line + without weight decay

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 5.742
[Epoch 2] Training Loss: 4.817
[Epoch 3] Training Loss: 4.547
[Epoch 4] Training Loss: 4.364
[Epoch 5] Training Loss: 4.111
[Epoch 6] Training Loss: 3.886
[Epoch 7] Training Loss: 3.756
[Epoch 8] Training Loss: 3.644
[Epoch 9] Training Loss: 3.516
[Epoch 10] Training Loss: 3.418
[Epoch 11] Training Loss: 3.225
[Epoch 12] Training Loss: 3.003
[Epoch 13] Training Loss: 2.878
[Epoch 14] Training Loss: 2.604
[Epoch 15] Training Loss: 2.448
[Epoch 16] Training Loss: 2.188
[Epoch 17] Training Loss: 1.953
[Epoch 18] Training Loss: 1.748
[Epoch 19] Training Loss: 1.356
[Epoch 20] Training Loss: 1.231
[Epoch 21] Training Loss: 0.933
[Epoch 22] Training Loss: 0.790
[Epoch 23] Training Loss: 0.819
[Epoch 24] Training Loss: 0.872
[Epoch 25] Training Loss: 0.694
[Epoch 26] Training Loss: 0.560
[Epoch 27] Training Loss: 0.431
[Epoch 28] Training Loss: 0.251
[Epoch 29] Training Loss: 0.331
[Epoc

#base line + with weight decay = 0.003

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 5.654
[Epoch 2] Training Loss: 4.804
[Epoch 3] Training Loss: 4.402
[Epoch 4] Training Loss: 4.235
[Epoch 5] Training Loss: 3.961
[Epoch 6] Training Loss: 3.856
[Epoch 7] Training Loss: 3.750
[Epoch 8] Training Loss: 3.588
[Epoch 9] Training Loss: 3.471
[Epoch 10] Training Loss: 3.322
[Epoch 11] Training Loss: 3.187
[Epoch 12] Training Loss: 3.078
[Epoch 13] Training Loss: 2.973
[Epoch 14] Training Loss: 2.728
[Epoch 15] Training Loss: 2.643
[Epoch 16] Training Loss: 2.438
[Epoch 17] Training Loss: 2.305
[Epoch 18] Training Loss: 2.204
[Epoch 19] Training Loss: 1.979
[Epoch 20] Training Loss: 1.842
[Epoch 21] Training Loss: 1.747
[Epoch 22] Training Loss: 1.652
[Epoch 23] Training Loss: 1.499
[Epoch 24] Training Loss: 1.365
[Epoch 25] Training Loss: 1.126
[Epoch 26] Training Loss: 1.188
[Epoch 27] Training Loss: 1.166
[Epoch 28] Training Loss: 1.018
[Epoch 29] Training Loss: 0.912
[Epoc

#spectreg = 0.001 + without weight decay

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Function to calculate spectral norm of weights
def calculate_spectral_norm(model):
    spectral_norm = 0
    for param in model.parameters():
        if param.dim() > 1:  # Exclude biases
            spectral_norm += torch.norm(param, p='fro')
    return spectral_norm

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Add spectral regularization
        spectral_weight = 0.001
        spectral_norm = calculate_spectral_norm(model)
        loss += spectral_weight * spectral_norm

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 6.340
[Epoch 2] Training Loss: 5.367
[Epoch 3] Training Loss: 4.955
[Epoch 4] Training Loss: 4.689
[Epoch 5] Training Loss: 4.465
[Epoch 6] Training Loss: 4.349
[Epoch 7] Training Loss: 4.189
[Epoch 8] Training Loss: 4.044
[Epoch 9] Training Loss: 3.932
[Epoch 10] Training Loss: 3.747
[Epoch 11] Training Loss: 4.076
[Epoch 12] Training Loss: 3.685
[Epoch 13] Training Loss: 3.406
[Epoch 14] Training Loss: 3.148
[Epoch 15] Training Loss: 2.923
[Epoch 16] Training Loss: 2.997
[Epoch 17] Training Loss: 2.565
[Epoch 18] Training Loss: 2.194
[Epoch 19] Training Loss: 1.987
[Epoch 20] Training Loss: 1.589
[Epoch 21] Training Loss: 1.457
[Epoch 22] Training Loss: 1.367
[Epoch 23] Training Loss: 1.129
[Epoch 24] Training Loss: 1.069
[Epoch 25] Training Loss: 0.957
[Epoch 26] Training Loss: 0.920
[Epoch 27] Training Loss: 1.003
[Epoch 28] Training Loss: 0.845
[Epoch 29] Training Loss: 0.826
[Epoc

#spectreg = 0.0003 + weight decay = 0.003

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Subset
import torchvision.models as models

# Transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR-100 mean and std for each channel
])

# Load CIFAR-100 training and testing datasets
trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Function to get indices of limited samples per class
def get_limited_indices(dataset, limit_per_class):
    indices = []
    count_per_class = [0] * 100  # Adjusted for CIFAR-100 which has 100 classes
    for i in range(len(dataset)):
        _, label = dataset[i]
        if count_per_class[label] < limit_per_class:
            indices.append(i)
            count_per_class[label] += 1
        if all(count >= limit_per_class for count in count_per_class):
            break
    return indices

# Choose only 20 samples per class for training
train_indices = get_limited_indices(trainset, 20)
trainset_limited = Subset(trainset, train_indices)

# Create data loaders for training and testing sets
trainloader = DataLoader(trainset_limited, batch_size=64, shuffle=True)

# Choose only 1000 samples for testing
test_indices = range(1000)
testset_limited = Subset(testset, test_indices)
testloader = DataLoader(testset_limited, batch_size=64, shuffle=False)

# Define ResNet-18 model
class ResNet18(nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False, num_classes=100)  # CIFAR-100 has 100 classes

    def forward(self, x):
        return self.resnet(x)

# Initialize ResNet-18 model
model = ResNet18()

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define SGD optimizer with weight decay
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.003)

# Learning rate scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

# Function to calculate spectral norm of weights
def calculate_spectral_norm(model):
    spectral_norm = 0
    for param in model.parameters():
        if param.dim() > 1:  # Exclude biases
            spectral_norm += torch.norm(param, p='fro')
    return spectral_norm

# Training loop
for epoch in range(33):  # Adjust the number of epochs as needed
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Add spectral regularization
        spectral_weight = 0.0003
        spectral_norm = calculate_spectral_norm(model)
        loss += spectral_weight * spectral_norm

        # Backward pass
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
    scheduler.step()

    # Print epoch and training loss
    print('[Epoch %d] Training Loss: %.3f' % (epoch + 1, running_loss / len(trainloader)))

print('Finished Training')

# Evaluate on test set after completing training
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on Test Set: %.2f %%' % (100 * correct / total))


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1] Training Loss: 6.016
[Epoch 2] Training Loss: 4.937
[Epoch 3] Training Loss: 4.446
[Epoch 4] Training Loss: 4.270
[Epoch 5] Training Loss: 4.080
[Epoch 6] Training Loss: 3.947
[Epoch 7] Training Loss: 3.819
[Epoch 8] Training Loss: 3.703
[Epoch 9] Training Loss: 3.582
[Epoch 10] Training Loss: 3.397
[Epoch 11] Training Loss: 3.263
[Epoch 12] Training Loss: 3.049
[Epoch 13] Training Loss: 2.882
[Epoch 14] Training Loss: 2.776
[Epoch 15] Training Loss: 2.688
[Epoch 16] Training Loss: 2.430
[Epoch 17] Training Loss: 2.243
[Epoch 18] Training Loss: 2.237
[Epoch 19] Training Loss: 2.075
[Epoch 20] Training Loss: 1.891
[Epoch 21] Training Loss: 1.646
[Epoch 22] Training Loss: 1.761
[Epoch 23] Training Loss: 1.620
[Epoch 24] Training Loss: 1.512
[Epoch 25] Training Loss: 1.266
[Epoch 26] Training Loss: 1.307
[Epoch 27] Training Loss: 1.266
[Epoch 28] Training Loss: 1.117
[Epoch 29] Training Loss: 0.972
[Epoc