In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [3]:
print(dir(datasets))

['CIFAR10', 'CIFAR100', 'CLEVRClassification', 'CREStereo', 'Caltech101', 'Caltech256', 'CarlaStereo', 'CelebA', 'Cityscapes', 'CocoCaptions', 'CocoDetection', 'Country211', 'DTD', 'DatasetFolder', 'EMNIST', 'ETH3DStereo', 'EuroSAT', 'FER2013', 'FGVCAircraft', 'FakeData', 'FallingThingsStereo', 'FashionMNIST', 'Flickr30k', 'Flickr8k', 'Flowers102', 'FlyingChairs', 'FlyingThings3D', 'Food101', 'GTSRB', 'HD1K', 'HMDB51', 'INaturalist', 'ImageFolder', 'ImageNet', 'Imagenette', 'InStereo2k', 'KMNIST', 'Kinetics', 'Kitti', 'Kitti2012Stereo', 'Kitti2015Stereo', 'KittiFlow', 'LFWPairs', 'LFWPeople', 'LSUN', 'LSUNClass', 'MNIST', 'Middlebury2014Stereo', 'MovingMNIST', 'Omniglot', 'OxfordIIITPet', 'PCAM', 'PhotoTour', 'Places365', 'QMNIST', 'RenderedSST2', 'SBDataset', 'SBU', 'SEMEION', 'STL10', 'SUN397', 'SVHN', 'SceneFlowStereo', 'Sintel', 'SintelStereo', 'StanfordCars', 'UCF101', 'USPS', 'VOCDetection', 'VOCSegmentation', 'VisionDataset', 'WIDERFace', '__all__', '__builtins__', '__cached__',

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
device

device(type='cpu')

In [6]:
# Define data augmentation transformations
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # 50% chance to flip
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Width/height shift by 10%
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

dtrain = datasets.CIFAR10(root='data',download=True,train=True,transform=transform_train)
dtest = datasets.CIFAR10(root='data',download=True,train=False,transform=transform_test)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


 36%|███▌      | 61505536/170498071 [01:38<02:55, 622786.84it/s] 


KeyboardInterrupt: 

In [7]:
# View dataset properties
print(f"Train Dataset Length: {len(dtrain)}")
print(f"Test Dataset Length: {len(dtest)}")
print(f"Sample Data Type: {type(dtrain[0])}")

# Access first sample
image, label = dtrain[0]
print(f"Image Shape: {image.shape}")  # (C, H, W) format
print(f"Label: {label}")

# Check class labels
print(f"Classes: {dtrain.classes if hasattr(dtrain, 'classes') else 'No class labels available'}")

Train Dataset Length: 50000
Test Dataset Length: 10000
Sample Data Type: <class 'tuple'>
Image Shape: torch.Size([3, 32, 32])
Label: 6
Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [8]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(dtrain, batch_size=batch_size)
test_dataloader = DataLoader(dtest, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 3, 32, 32])
Shape of y: torch.Size([64]) torch.int64


In [9]:
class CNN8(nn.Module):
    def __init__(self):
        super(CNN8, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(512 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))

        x = self.relu(self.conv3(x))
        x = self.pool(self.relu(self.conv4(x)))

        x = self.relu(self.conv5(x))
        x = self.pool(self.relu(self.conv6(x)))

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)

        return x

In [10]:
class CrossEntropy(nn.Module):
    def __init__(self, num_classes=10):
        super(CrossEntropy, self).__init__()
        self.ce_loss = nn.CrossEntropyLoss()

    def forward(self, pred, labels):
        return self.ce_loss(pred, labels)


class ReverseCrossEntropy(nn.Module):
    def __init__(self, num_classes=10, scale=1.0):
        super(ReverseCrossEntropy, self).__init__()
        self.num_classes = num_classes
        self.scale = scale

    def forward(self, pred, labels):
        pred = F.softmax(pred, dim=1)
        pred = torch.clamp(pred, min=1e-7, max=1.0)
        label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
        label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
        rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
        return self.scale * rce.mean()


class NormalizedReverseCrossEntropy(nn.Module):
    def __init__(self, num_classes=10, scale=1.0):
        super(NormalizedReverseCrossEntropy, self).__init__()
        self.device = device
        self.num_classes = num_classes
        self.scale = scale

    def forward(self, pred, labels):
        pred = F.softmax(pred, dim=1)
        pred = torch.clamp(pred, min=1e-7, max=1.0)
        label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
        label_one_hot = torch.clamp(label_one_hot, min=1e-4, max=1.0)
        normalizor = 1 / 4 * (self.num_classes - 1)
        rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1))
        return self.scale * normalizor * rce.mean()


class NormalizedCrossEntropy(nn.Module):
    def __init__(self, num_classes=10, scale=1.0):
        super(NormalizedCrossEntropy, self).__init__()
        self.device = device
        self.num_classes = num_classes
        self.scale = scale

    def forward(self, pred, labels):
        pred = F.log_softmax(pred, dim=1)
        label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
        nce = -1 * torch.sum(label_one_hot * pred, dim=1) / (- pred.sum(dim=1))
        return self.scale * nce.mean()


class MeanAbsoluteError(nn.Module):
    def __init__(self, num_classes=10, scale=1.0):
        super(MeanAbsoluteError, self).__init__()
        self.device = device
        self.num_classes = num_classes
        self.scale = scale
        return

    def forward(self, pred, labels):
        pred = F.softmax(pred, dim=1)
        label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
        mae = 1. - torch.sum(label_one_hot * pred, dim=1)
        return self.scale * mae.mean()


class NormalizedMeanAbsoluteError(nn.Module):
    def __init__(self, num_classes=10, scale=1.0):
        super(NormalizedMeanAbsoluteError, self).__init__()
        self.device = device
        self.num_classes = num_classes
        self.scale = scale
        return

    def forward(self, pred, labels):
        pred = F.softmax(pred, dim=1)
        label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device)
        normalizor = 1 / (2 * (self.num_classes - 1))
        mae = 1. - torch.sum(label_one_hot * pred, dim=1)
        return self.scale * normalizor * mae.mean()


class FocalLoss(nn.Module):
    '''
        https://github.com/clcarwin/focal_loss_pytorch/blob/master/focalloss.py
    '''

    def __init__(self, gamma=0.5, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha, (float, int)):
            self.alpha = torch.Tensor([alpha, 1-alpha])
        if isinstance(alpha, list):
            self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim() > 2:
            input = input.view(input.size(0), input.size(1), -1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1, 2)                         # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1, input.size(2))    # N,H*W,C => N*H*W,C
        target = target.view(-1, 1)

        logpt = F.log_softmax(input, dim=1)
        logpt = logpt.gather(1, target)
        logpt = logpt.view(-1)
        pt = torch.autograd.Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type() != input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0, target.data.view(-1))
            logpt = logpt * torch.autograd.Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average:
            return loss.mean()
        else:
            return loss.sum()


class NormalizedFocalLoss(nn.Module):
    def __init__(self, scale=1.0, gamma=0, num_classes=10, alpha=None, size_average=True):
        super(NormalizedFocalLoss, self).__init__()
        self.gamma = gamma
        self.size_average = size_average
        self.num_classes = num_classes
        self.scale = scale

    def forward(self, input, target):
        target = target.view(-1, 1)
        logpt = F.log_softmax(input, dim=1)
        normalizor = torch.sum(-1 * (1 - logpt.data.exp()) ** self.gamma * logpt, dim=1)
        logpt = logpt.gather(1, target)
        logpt = logpt.view(-1)
        pt = torch.autograd.Variable(logpt.data.exp())
        loss = -1 * (1-pt)**self.gamma * logpt
        loss = self.scale * loss / normalizor

        if self.size_average:
            return loss.mean()
        else:
            return loss.sum()


# losses done - 4 possible combinations below

class NCEandRCE(torch.nn.Module):
    def __init__(self, alpha, beta, num_classes=10):
        super(NCEandRCE, self).__init__()
        self.num_classes = num_classes
        self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes)
        self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)

    def forward(self, pred, labels):
        return self.nce(pred, labels) + self.rce(pred, labels)


class NCEandMAE(torch.nn.Module):
    def __init__(self, alpha, beta, num_classes=10):
        super(NCEandMAE, self).__init__()
        self.num_classes = num_classes
        self.nce = NormalizedCrossEntropy(scale=alpha, num_classes=num_classes)
        self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)

    def forward(self, pred, labels):
        return self.nce(pred, labels) + self.mae(pred, labels)


class NFLandMAE(torch.nn.Module):
    def __init__(self, alpha, beta, num_classes=10, gamma=0.5):
        super(NFLandMAE, self).__init__()
        self.num_classes = num_classes
        self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
        self.mae = MeanAbsoluteError(scale=beta, num_classes=num_classes)

    def forward(self, pred, labels):
        return self.nfl(pred, labels) + self.mae(pred, labels)


class NFLandRCE(torch.nn.Module):
    def __init__(self, alpha, beta, num_classes=10, gamma=0.5):
        super(NFLandRCE, self).__init__()
        self.num_classes = num_classes
        self.nfl = NormalizedFocalLoss(scale=alpha, gamma=gamma, num_classes=num_classes)
        self.rce = ReverseCrossEntropy(scale=beta, num_classes=num_classes)

    def forward(self, pred, labels):
        return self.nfl(pred, labels) + self.rce(pred, labels)


In [11]:
# robust loss - MAE, RCE
# normalized - NMAE, NRCE
# non robust - CE, FL
# made robust - NCE, NFL
# active loss - NCE, NFL
# passive loss - MAE, RCE
# 4 possible combinations - NCE + MAE, NCE + RCE, NFL + MAE, NFL + RCE

# Experiment 1 - robust(normalized) vs non robust test error
# Experiment 2 - best alpha, beta for each combination
# Experiment 3 - comparison across 4 possible combinations (best params of each)


In [12]:
def add_symmetric_noise(labels, noise_ratio):
    """Flip labels randomly to incorrect ones for symmetric noise."""
    noisy_labels = labels.clone()
    num_classes = len(np.unique(labels))
    num_samples = len(labels)

    # Randomly flip labels
    for i in range(num_samples):
        if np.random.rand() < noise_ratio:
            # Randomly choose a new label that is different from the original
            new_label = np.random.randint(0, num_classes)
            while new_label == labels[i]:
                new_label = np.random.randint(0, num_classes)
            noisy_labels[i] = new_label

    return noisy_labels


def add_asymmetric_noise(labels, noise_ratio):
    """Flip labels within a specific set of classes for asymmetric noise."""
    noisy_labels = labels.clone()

    # Noise map: Flip classes as defined by the user (example for CIFAR-10)
    noise_map = {
        0: [1, 2],  # Flip class 0 to class 1 or class 2
        1: [2, 3],  # Flip class 1 to class 2 or class 3
        2: [3, 4],  # Flip class 2 to class 3 or class 4
        3: [4, 5],  # Flip class 3 to class 4 or class 5
        4: [5, 6],  # Flip class 4 to class 5 or class 6
        5: [6, 7],  # Flip class 5 to class 6 or class 7
        6: [7, 8],  # Flip class 6 to class 7 or class 8
        7: [8, 9],  # Flip class 7 to class 8 or class 9
        8: [9, 0],  # Flip class 8 to class 9 or class 0
        9: [0, 1],   # Flip class 9 to class 0 or class 1
        10:[0,1]
    }
    num_samples = len(labels)

    for i in range(num_samples):
        if np.random.rand() < noise_ratio:
            class_label = labels[i].item()
            if class_label in noise_map:
                new_label = np.random.choice(noise_map[class_label])
                noisy_labels[i] = new_label

    return noisy_labels

In [13]:
def train(dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        if noise_type == 'symmetric':
            y = add_symmetric_noise(y, noise_ratio)
        elif noise_type == 'asymmetric':
            y = add_asymmetric_noise(y, noise_ratio)
        else:
            pass
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]  lr: {scheduler.get_last_lr()[0]:.6f}")


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct*100


In [14]:
num_classes = 10
gamma = 0.5
alpha = 1
beta = 1
loss_functions = [
    CrossEntropy(num_classes=num_classes),
    ReverseCrossEntropy(num_classes=num_classes, scale=1.0),
    NormalizedReverseCrossEntropy(num_classes=num_classes, scale=1.0),
    NormalizedCrossEntropy(num_classes=num_classes, scale=1.0),
    MeanAbsoluteError(num_classes=num_classes, scale=1.0),
    NormalizedMeanAbsoluteError(num_classes=num_classes, scale=1.0),
    FocalLoss(gamma=gamma, alpha=None, size_average=True),
    NormalizedFocalLoss(scale=1.0, gamma=gamma, num_classes=num_classes, alpha=None, size_average=True)
]

apl_loss_functions = [
    NCEandRCE(alpha=alpha, beta=beta, num_classes=num_classes),
    NCEandMAE(alpha=alpha, beta=beta, num_classes=num_classes),
    NFLandMAE(alpha=alpha, beta=beta, num_classes=num_classes, gamma=gamma),
    NFLandRCE(alpha=alpha, beta=beta, num_classes=num_classes, gamma=gamma)
]

results = []

In [None]:
# Initialize model, optimizer, and loss function

model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = CrossEntropy(num_classes=num_classes)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

Loss Function: CrossEntropy(
  (ce_loss): CrossEntropyLoss()
)
Epoch 1
-------------------------------
loss: 2.303738  [   64/50000]  lr: 0.010000
loss: 2.300054  [ 6464/50000]  lr: 0.010000
loss: 2.301407  [12864/50000]  lr: 0.010000
loss: 2.302445  [19264/50000]  lr: 0.010000
loss: 2.306779  [25664/50000]  lr: 0.010000
loss: 2.304056  [32064/50000]  lr: 0.010000
loss: 2.303227  [38464/50000]  lr: 0.010000
loss: 2.302618  [44864/50000]  lr: 0.010000
Test Error: 
 Accuracy: 10.2%, Avg loss: 2.302203 

Epoch 2
-------------------------------
loss: 2.302208  [   64/50000]  lr: 0.009755
loss: 2.304506  [ 6464/50000]  lr: 0.009755
loss: 2.305957  [12864/50000]  lr: 0.009755
loss: 2.309018  [19264/50000]  lr: 0.009755
loss: 2.296581  [25664/50000]  lr: 0.009755
loss: 2.291424  [32064/50000]  lr: 0.009755
loss: 2.323213  [38464/50000]  lr: 0.009755
loss: 2.304954  [44864/50000]  lr: 0.009755
Test Error: 
 Accuracy: 23.0%, Avg loss: 2.193240 

Epoch 3
-------------------------------
loss: 2.2

In [None]:
# Initialize model, optimizer, and loss function

model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = NormalizedCrossEntropy(num_classes=num_classes, scale=1.0)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

In [None]:
model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = ReverseCrossEntropy(num_classes=num_classes, scale=1.0)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

In [None]:
model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = NormalizedReverseCrossEntropy(num_classes=num_classes, scale=1.0)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

In [None]:
model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = MeanAbsoluteError(num_classes=num_classes, scale=1.0)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

In [None]:
model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = NormalizedMeanAbsoluteError(num_classes=num_classes, scale=1.0)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

In [None]:
model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = FocalLoss(gamma=gamma, alpha=None, size_average=True)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")

In [None]:
model = CNN8().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

loss_fn = NormalizedFocalLoss(scale=1.0, gamma=gamma, num_classes=num_classes, alpha=None, size_average=True)

noise_type = "symmetric"
noise_ratio = 0.6
epochs = 120
print(f"Loss Function: {loss_fn}")
R = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer, scheduler, noise_type, noise_ratio)
    test_accuracy = test(test_dataloader, model, loss_fn)
    R.append(test_accuracy)
    scheduler.step()
results.append(R)
print("Done!")