<a href="https://colab.research.google.com/github/suinkangme/comp433_project/blob/main/COMP433_Project_DK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Developing a robust CNN model to address the challenge of learning with label noise in  CIFAR10 dataset

- CIFAR10 Label : ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’.

- image size : 3x32x32




In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, random_split
import os
import matplotlib.pyplot as plt
from IPython.display import clear_output

In [2]:
# Training on GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Load and normalize CIFAR10

In [3]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])



In [4]:
# download dataset for training
cifar_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=None)

# 데이터를 train과 validation으로 나누기 위해 인덱스 생성
dataset_size = len(cifar_dataset)
validation_split = 0.2
val_size = int(validation_split * dataset_size)
train_size = dataset_size - val_size

# 데이터를 나누기
train_dataset, val_dataset = random_split(cifar_dataset, [train_size, val_size])

# 적절한 transform 적용
train_dataset.dataset.transform = transform_train
val_dataset.dataset.transform = transform_val

# 데이터 로더 설정
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:11<00:00, 15161843.29it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [5]:

# test dataset
test_dataset = torchvision.datasets.CIFAR10(root='./data',
                                          train=False,
                                          download=True,
                                          transform = transform_test)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified


## Define & train with a  baseline CNN model

In [6]:
class BaselineModel(nn.Module):
  def __init__(self):
    super(BaselineModel,self).__init__()
    self.features = nn.Sequential(
      nn.Conv2d(3, 8, kernel_size=3, padding = 1),  # (input channel, output channels, kernel size, padding)  32*32*8
      nn.ReLU(inplace=True), # activation function modifies the input tensor directly
      nn.Conv2d(8, 16, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2,stride=2), # 16*16*16

      nn.Conv2d(16, 32, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(32, 64, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(64, 128, kernel_size=3, padding=1),
      nn.MaxPool2d(kernel_size=2,stride=2) # 8*8*128
    )

    # fully connected layers
    self.fc_layers = nn.Sequential(
      nn.Linear(128*8*8, 120),
      nn.ReLU(inplace=True),
      nn.Linear(120,84),
      nn.ReLU(inplace=True),
      nn.Linear(84,10)
    )


  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x,1)
    x = self.fc_layers(x)
    return x



In [7]:
base_model = BaselineModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(base_model.parameters(), lr=0.01, momentum=0.9)


In [8]:
base_model.train()
num_epochs = 10

for epoch in range(num_epochs):
    # Training
    base_model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for inputs, labels in train_loader:
        # Move data to GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = base_model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    average_train_loss = total_train_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train

    # Validation
    base_model.eval()
    total_val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            # Move data to GPU
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = base_model(inputs)
            loss = criterion(outputs, labels)

            total_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    average_val_loss = total_val_loss / len(val_loader)
    val_acc = 100 * correct_val / total_val

    print(f'Epoch {epoch + 1}/{num_epochs}, '
          f'Train Loss: {average_train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
          f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {val_acc:.2f}')


Epoch 1/10, Train Loss: 2.2501, Train Accuracy: 13.57%, Validation Loss: 1.9609, Validation Accuracy: 27.48
Epoch 2/10, Train Loss: 1.6230, Train Accuracy: 40.83%, Validation Loss: 1.4169, Validation Accuracy: 49.18
Epoch 3/10, Train Loss: 1.3080, Train Accuracy: 52.53%, Validation Loss: 1.2174, Validation Accuracy: 56.83
Epoch 4/10, Train Loss: 1.0915, Train Accuracy: 61.21%, Validation Loss: 1.0687, Validation Accuracy: 62.21
Epoch 5/10, Train Loss: 0.9044, Train Accuracy: 68.29%, Validation Loss: 0.9325, Validation Accuracy: 67.33
Epoch 6/10, Train Loss: 0.7715, Train Accuracy: 72.79%, Validation Loss: 0.8748, Validation Accuracy: 69.96
Epoch 7/10, Train Loss: 0.6470, Train Accuracy: 77.31%, Validation Loss: 0.8478, Validation Accuracy: 71.18
Epoch 8/10, Train Loss: 0.5414, Train Accuracy: 80.71%, Validation Loss: 0.8848, Validation Accuracy: 70.91
Epoch 9/10, Train Loss: 0.4312, Train Accuracy: 84.79%, Validation Loss: 0.9436, Validation Accuracy: 71.29
Epoch 10/10, Train Loss: 0.3

In [9]:
# testing
base_model.eval()

# Variables to store predictions and ground truth labels
num_correct_predictions = 0
total_num_predictions = 0
test_loss = 0.0

# Iterate over the test dataset
with torch.no_grad():  # temporarily set all requires_grad flags to False
    for inputs, labels in test_loader:
        # Move data to GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = base_model(inputs)
        loss = criterion(outputs, labels)

        # Compute loss and number of accurate predictions
        test_loss += loss.item()
        preds = outputs.argmax(dim=1)
        num_correct_predictions += (preds == labels).sum().item()
        total_num_predictions += labels.size(0)

# Compute average test loss
average_test_loss = test_loss / len(test_loader.dataset)

# Compute accuracy percentage
accuracy = (num_correct_predictions / total_num_predictions) * 100

print(f"Test Accuracy: {accuracy:.2f}%, Average Test Loss: {average_test_loss:.4f}")


Test Accuracy: 71.48%, Average Test Loss: 0.0155


## Noise Labeling
- 5 different noise levels (10%,
30%, 50%, 80%, 90%)

### Symmetric label noise

In [10]:
def replace_symmetric_noise(labels, epsilon):
    num_labels = len(labels)
    num_flips = int(epsilon * num_labels)

    # choose the label to be flipped
    flip_indices = np.random.choice(num_labels, num_flips, replace=True)

    # filp the label
    labels[flip_indices] = np.random.randint(0, 10, num_flips)

    return labels

### Asymmetric label noise

In [11]:
def flip_labels_asymmetrically(labels, epsilon):
    flip_rules = {
        9: 1,   # Truck to Automobile
        2: 0,   # Bird to Airplane
        4: 7,   # Deer to Horse
        3: 5,   # Cat to Dog
        5: 3,   # Dog to Cat
    }

    flipped_labels = []
    for label in labels:
        # Check if label flipping should occur based on epsilon
        if np.random.random() < epsilon:
            # Flip the label based on the flip_rules dictionary
            flipped_label = flip_rules.get(label, label)
            flipped_labels.append(flipped_label)
        else:
            # If no flipping, keep the original label
            flipped_labels.append(label)

    return np.array(flipped_labels)


## Train the model with the noise labeling

### Train with the symmetric noise labeling

In [17]:
# noise_levels
noise_levels = [0.1, 0.3, 0.5, 0.8, 0.9]

# Create a dictionary with keys in the format 'noise_level_{100 * value}'
symmetric_model_dict = {f'noise_level_{int(100 * level)}_sy': None for level in noise_levels}

for epsilon in noise_levels:

    net = BaselineModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

    net = net.to(device)
    net.train()
    num_epochs = 5

    print(f"Symmetric Training with noise level: {epsilon}")

    for epoch in range(num_epochs):
        # Training
        net.train()
        total_train_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:

            labels_noisy = torch.from_numpy(replace_symmetric_noise(labels.numpy(), epsilon))

            # Move data to GPU
            inputs, labels_noisy = inputs.to(device), labels_noisy.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = net(inputs)
            loss = criterion(outputs, labels_noisy)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels_noisy.size(0)
            correct_train += (predicted == labels_noisy).sum().item()

        average_train_loss = total_train_loss / len(train_loader)
        train_acc = 100 * correct_train / total_train

        # Validation
        net.eval()
        total_val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                # Move data to GPU
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = net(inputs)
                loss = criterion(outputs, labels)

                total_val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        average_val_loss = total_val_loss / len(val_loader)
        val_acc = 100 * correct_val / total_val

        print(f'Epoch {epoch + 1}/{num_epochs}, '
              f'Train Loss: {average_train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
              f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {val_acc:.2f}')

    # save model to dictionary
    symmetric_model_dict[f'noise_level_{int(100 * epsilon)}_sy'] = net.state_dict()


Symmetric Training with noise level: 0.1
Epoch 1/5, Train Loss: 2.2945, Train Accuracy: 11.25%, Validation Loss: 2.1260, Validation Accuracy: 19.02
Epoch 2/5, Train Loss: 1.9610, Train Accuracy: 29.36%, Validation Loss: 1.6650, Validation Accuracy: 40.47
Epoch 3/5, Train Loss: 1.6835, Train Accuracy: 41.77%, Validation Loss: 1.4254, Validation Accuracy: 49.27
Epoch 4/5, Train Loss: 1.4591, Train Accuracy: 51.70%, Validation Loss: 1.1841, Validation Accuracy: 58.77
Epoch 5/5, Train Loss: 1.3067, Train Accuracy: 58.09%, Validation Loss: 1.1220, Validation Accuracy: 62.66
Symmetric Training with noise level: 0.3
Epoch 1/5, Train Loss: 2.3023, Train Accuracy: 10.61%, Validation Loss: 2.2906, Validation Accuracy: 13.43
Epoch 2/5, Train Loss: 2.1362, Train Accuracy: 22.50%, Validation Loss: 1.8132, Validation Accuracy: 35.86
Epoch 3/5, Train Loss: 1.9476, Train Accuracy: 33.48%, Validation Loss: 1.6303, Validation Accuracy: 45.66
Epoch 4/5, Train Loss: 1.8383, Train Accuracy: 38.94%, Validat

### Testing

In [18]:
for key, net in symmetric_model_dict.items():
    net.eval()
    net.to(device)

    # Variables to store predictions and ground truth labels
    num_correct_predictions = 0
    total_num_predictions = 0
    loss = 0.0

    # Iterate over the test dataset
    with torch.no_grad():  # temporarily set all requires_grad flags to False
        for i, (data, label) in enumerate(test_loader):
            # move inputs to desired device and dtype
            data = data.to(device, dtype=torch.float32)
            label = label.to(device, dtype=torch.long)

            # forward pass
            logit = net(data)

            # compute loss and number of accurate predictions
            loss += torch.nn.functional.cross_entropy(logit, label, reduction='sum').item()
            preds = logit.max(dim=1)[1]
            num_correct_predictions += (preds == label).sum().item()
            total_num_predictions += len(preds)

        # compute average loss
        loss /= total_num_predictions

        # compute accuracy percentage
        accuracy = (float(num_correct_predictions) / total_num_predictions) * 100

        print(f"Symmetric {key} Accuracy: {accuracy:.2f}%, Loss: {loss:.4f}")


Symmetric noise_level_10_sy Accuracy: 62.65%, Loss: 1.1171
Symmetric noise_level_30_sy Accuracy: 56.49%, Loss: 1.3367
Symmetric noise_level_50_sy Accuracy: 53.56%, Loss: 1.5025
Symmetric noise_level_80_sy Accuracy: 45.13%, Loss: 1.7819
Symmetric noise_level_90_sy Accuracy: 37.29%, Loss: 1.9551


### Train with the asymmetric noise labeling

In [19]:
# noise_levels
noise_levels = [0.1, 0.3, 0.5, 0.8, 0.9]
# Create a dictionary with keys in the format 'noise_level_{100 * value}'
asymmetric_model_dict = {f'noise_level_{int(100 * level)}_asy': None for level in noise_levels}



# Training on GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for epsilon in noise_levels:


    net = BaselineModel()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.01, momentum = 0.9)


    net = net.to(device)
    net.train()
    num_epochs = 5


    print(f"Asymmetric Training with noise level: {epsilon}")

    for epoch in range(num_epochs):
      # Training
      net.train()
      total_train_loss = 0.0
      correct_train = 0
      total_train = 0

      for inputs, labels in train_loader:
          labels_noisy = torch.from_numpy(flip_labels_asymmetrically(labels.numpy(), epsilon))
          # Move data to GPU
          inputs, labels_noisy = inputs.to(device), labels_noisy.to(device)

          # Zero the gradients
          optimizer.zero_grad()

          # Forward pass
          outputs = net(inputs)
          loss = criterion(outputs, labels_noisy)

          # Backward pass and optimization
          loss.backward()
          optimizer.step()

          total_train_loss += loss.item()
          _, predicted = torch.max(outputs.data, 1)
          total_train += labels_noisy.size(0)
          correct_train += (predicted == labels_noisy).sum().item()

      average_train_loss = total_train_loss / len(train_loader)
      train_acc = 100 * correct_train / total_train

      # Validation
      net.eval()
      total_val_loss = 0.0
      correct_val = 0
      total_val = 0

      with torch.no_grad():
          for inputs, labels in val_loader:
              # Move data to GPU
              inputs, labels = inputs.to(device), labels.to(device)

              # Forward pass
              outputs = net(inputs)
              loss = criterion(outputs, labels)

              total_val_loss += loss.item()
              _, predicted = torch.max(outputs.data, 1)
              total_val += labels.size(0)
              correct_val += (predicted == labels).sum().item()

      average_val_loss = total_val_loss / len(val_loader)
      val_acc = 100 * correct_val / total_val

      print(f'Epoch {epoch + 1}/{num_epochs}, '
            f'Train Loss: {average_train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
            f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {val_acc:.2f}')
    # save model to dictionary
    asymmetric_model_dict[f'noise_level_{int(100 * epsilon)}_asy'] = net


Asymmetric Training with noise level: 0.1
Epoch 1/5, Train Loss: 2.0232, Train Accuracy: 25.34%, Validation Loss: 1.6327, Validation Accuracy: 40.01
Epoch 2/5, Train Loss: 1.4802, Train Accuracy: 46.39%, Validation Loss: 1.2802, Validation Accuracy: 54.41
Epoch 3/5, Train Loss: 1.2199, Train Accuracy: 56.32%, Validation Loss: 1.1469, Validation Accuracy: 59.44
Epoch 4/5, Train Loss: 1.0553, Train Accuracy: 62.59%, Validation Loss: 1.0076, Validation Accuracy: 64.83
Epoch 5/5, Train Loss: 0.9254, Train Accuracy: 67.21%, Validation Loss: 0.9439, Validation Accuracy: 67.37
Asymmetric Training with noise level: 0.3
Epoch 1/5, Train Loss: 2.2781, Train Accuracy: 12.77%, Validation Loss: 2.3278, Validation Accuracy: 9.69
Epoch 2/5, Train Loss: 2.2353, Train Accuracy: 16.44%, Validation Loss: 2.0827, Validation Accuracy: 23.93
Epoch 3/5, Train Loss: 1.7016, Train Accuracy: 37.41%, Validation Loss: 1.5893, Validation Accuracy: 40.34
Epoch 4/5, Train Loss: 1.4609, Train Accuracy: 45.48%, Valida

### Testing

In [20]:
for key, net in asymmetric_model_dict.items():
    net.eval()
    net.to(device)

    # Variables to store predictions and ground truth labels
    num_correct_predictions = 0
    total_num_predictions = 0
    loss = 0.0

    # Iterate over the test dataset
    with torch.no_grad():  # temporarily set all requires_grad flags to False
        for i, (data, label) in enumerate(test_loader):
            # move inputs to desired device and dtype
            data = data.to(device, dtype=torch.float32)
            label = label.to(device, dtype=torch.long)

            # forward pass
            logit = net(data)

            # compute loss and number of accurate predictions
            loss += torch.nn.functional.cross_entropy(logit, label, reduction='sum').item()
            preds = logit.max(dim=1)[1]
            num_correct_predictions += (preds == label).sum().item()
            total_num_predictions += len(preds)

        # compute average loss
        loss /= total_num_predictions

        # compute accuracy percentage
        accuracy = (float(num_correct_predictions) / total_num_predictions) * 100

        print(f"Asymmetric {key} Accuracy: {accuracy:.2f}%, Loss: {loss:.4f}")

Asymmetric noise_level_10_asy Accuracy: 66.53%
Asymmetric noise_level_30_asy Accuracy: 57.70%
Asymmetric noise_level_50_asy Accuracy: 56.23%
Asymmetric noise_level_80_asy Accuracy: 44.73%
Asymmetric noise_level_90_asy Accuracy: 42.86%


# Active Passive Loss (APL)

**1. Active Loss Functions**:
<br> Cross Entropy (CE)
<br> Normalized Cross Entropy (NCE)
<br> Focal Loss (FL)
<br> Normalized Focal Loss (NFL)
<br>


<br> **2. Passive Loss Functions**:
<br> Mean Absolute Error (MAE)
<br> Normalized Mean Absolute Error (NMAE)
<br> Reverse Cross Entropy (RCE)
<br> Normalized Reverse Cross Entropy (NRCE)
<br>



In [21]:
class APLLoss(nn.Module):
    def __init__(self, alpha, beta):
        super(APLLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.active_loss = nn.CrossEntropyLoss()  # CE
        self.passive_loss = nn.L1Loss()  # MAE

    def forward(self, outputs, labels):
        loss_active = self.active_loss(outputs, labels)

        label_one_hot = torch.nn.functional.one_hot(labels.clone().detach(), 10)
        # loss_passive = self.passive_loss(outputs, label_one_hot.view(-1, 1))
        loss_passive = self.passive_loss(outputs, label_one_hot)
        apl_loss = self.alpha * loss_active + self.beta * loss_passive
        return apl_loss


In [22]:
alpha = 1.0  # 이 값은 조절 가능
beta = 10.0   # 이 값은 조절 가능
apl_criterion = APLLoss(alpha, beta)

APL_model = BaselineModel().to(device)
optimizer = optim.SGD(APL_model.parameters(), lr=0.01, momentum=0.9)


In [23]:
APL_model.train()
num_epochs = 5

for epoch in range(num_epochs):
    # Training
    APL_model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for inputs, labels in train_loader:
        # Move data to GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = APL_model(inputs)
        loss = apl_criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    average_train_loss = total_train_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train

    # Validation
    APL_model.eval()
    total_val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            # Move data to GPU
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = APL_model(inputs)
            loss = apl_criterion(outputs, labels)

            total_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    average_val_loss = total_val_loss / len(val_loader)
    val_acc = 100 * correct_val / total_val

    print(f'Epoch {epoch + 1}/{num_epochs}, '
          f'Train Loss: {average_train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
          f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {val_acc:.2f}')


Epoch 1/5, Train Loss: 2.6138, Train Accuracy: 11.32%, Validation Loss: 2.6071, Validation Accuracy: 10.01
Epoch 2/5, Train Loss: 2.6077, Train Accuracy: 15.52%, Validation Loss: 2.6072, Validation Accuracy: 24.65
Epoch 3/5, Train Loss: 2.6044, Train Accuracy: 21.83%, Validation Loss: 2.5906, Validation Accuracy: 23.69
Epoch 4/5, Train Loss: 2.5700, Train Accuracy: 24.79%, Validation Loss: 2.5534, Validation Accuracy: 26.49
Epoch 5/5, Train Loss: 2.5125, Train Accuracy: 28.92%, Validation Loss: 2.4724, Validation Accuracy: 33.20


## APL with noisy label 90

In [24]:
alpha = 1.0  # 이 값은 조절 가능
beta = 3.0   # 이 값은 조절 가능
apl_criterion = APLLoss(alpha, beta)

APL_model = BaselineModel().to(device)
optimizer = optim.SGD(APL_model.parameters(), lr=0.001, momentum=0.9)


In [25]:
APL_model.train()
num_epochs = 5

for epoch in range(num_epochs):
    # Training
    APL_model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0

    for inputs, labels in train_loader:

        labels_noisy = torch.tensor(flip_labels_asymmetrically(labels.cpu().numpy(), 0.9)).to(device)
        # Move data to GPU
        inputs, labels_noisy = inputs.to(device), labels_noisy.to(device)


        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = APL_model(inputs)
        loss = apl_criterion(outputs, labels_noisy)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels_noisy.size(0)
        correct_train += (predicted == labels_noisy).sum().item()

    average_train_loss = total_train_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train

    # Validation
    APL_model.eval()
    total_val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            # Move data to GPU
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = APL_model(inputs)
            loss = apl_criterion(outputs, labels)

            total_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    average_val_loss = total_val_loss / len(val_loader)
    val_acc = 100 * correct_val / total_val

    print(f'Epoch {epoch + 1}/{num_epochs}, '
          f'Train Loss: {average_train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
          f'Validation Loss: {average_val_loss:.4f}, Validation Accuracy: {val_acc:.2f}')


Epoch 1/5, Train Loss: 2.6078, Train Accuracy: 21.94%, Validation Loss: 2.6035, Validation Accuracy: 12.36
Epoch 2/5, Train Loss: 2.6031, Train Accuracy: 25.46%, Validation Loss: 2.6033, Validation Accuracy: 13.92
Epoch 3/5, Train Loss: 2.6030, Train Accuracy: 27.14%, Validation Loss: 2.6034, Validation Accuracy: 15.39
Epoch 4/5, Train Loss: 2.6029, Train Accuracy: 27.62%, Validation Loss: 2.6036, Validation Accuracy: 10.61
Epoch 5/5, Train Loss: 2.6028, Train Accuracy: 28.69%, Validation Loss: 2.6035, Validation Accuracy: 14.10



## ScanMix