In [1]:
import torchvision
import torchvision.transforms as transforms
import torch
from tqdm import tqdm
from scipy.stats import mode
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from models_used import CIFAR10_Net
from torchvision import models

from noise_added import partition_imbalance, flip_labels_imbalance

In [2]:
cinic_directory = 'cinic_10_data'
cinic_mean = [0.47889522, 0.47227842, 0.43047404]
cinic_std = [0.24205776, 0.23828046, 0.25874835]

batch_size = 512


# Train data
cifar_train = torchvision.datasets.ImageFolder(
    cinic_directory + '/train2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_train = imagenet = torchvision.datasets.ImageFolder(
    cinic_directory + '/train2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)


# Validation data
cifar_valid = torchvision.datasets.ImageFolder(
    cinic_directory + '/valid2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_valid = torchvision.datasets.ImageFolder(
    cinic_directory + '/valid2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)


# Test data
cifar_test = torchvision.datasets.ImageFolder(
    cinic_directory + '/test2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_test = torchvision.datasets.ImageFolder(
    cinic_directory + '/test2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


## Flipped labels

In [4]:
fl_cifar_loaders = flip_labels_imbalance(dataset =  cifar_train, flip_percentages = [0,0.5,0.7], divs = 3)
fl_imagenet_loaders = flip_labels_imbalance(dataset =  imagenet_train, flip_percentages = [0,0.5,0.7], divs = 3)

In [5]:
num_epochs = 10
batch_size = 512
learning_rate = 0.001
divs = 3

In [6]:
# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(fl_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(vgg.state_dict(), f'cinic_various_models/vgg_cifar_fl_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters(), lr = learning_rate,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(fl_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(vgg.state_dict(), f'cinic_various_models/vgg_imagenet_fl_{i}.pt')

# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(fl_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(cifar10_net.state_dict(), f'cinic_various_models/cifar10_net_cifar_fl_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(fl_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(cifar10_net.state_dict(), f'cinic_various_models/cifar10_net_imagenet_fl_{i}.pt')

Training model 1




[1,     1] loss: 2.728
[1,     2] loss: 2.719
[1,     3] loss: 2.601
[1,     4] loss: 2.480
[1,     5] loss: 2.309
[1,     6] loss: 2.164
[1,     7] loss: 2.137
[1,     8] loss: 2.069
[1,     9] loss: 2.004
[1,    10] loss: 1.944
[1,    11] loss: 1.854
[1,    12] loss: 1.817
[1,    13] loss: 1.763
[1,    14] loss: 2.270
[2,     1] loss: 1.610
[2,     2] loss: 1.519
[2,     3] loss: 1.436
[2,     4] loss: 1.557
[2,     5] loss: 1.447
[2,     6] loss: 1.483
[2,     7] loss: 1.482
[2,     8] loss: 1.331
[2,     9] loss: 1.432
[2,    10] loss: 1.308
[2,    11] loss: 1.201
[2,    12] loss: 1.239
[2,    13] loss: 1.258
[2,    14] loss: 1.820
[3,     1] loss: 1.219
[3,     2] loss: 1.199
[3,     3] loss: 1.208
[3,     4] loss: 1.247
[3,     5] loss: 1.072
[3,     6] loss: 1.088
[3,     7] loss: 1.033
[3,     8] loss: 1.135
[3,     9] loss: 0.984
[3,    10] loss: 1.122
[3,    11] loss: 1.064
[3,    12] loss: 1.057
[3,    13] loss: 1.037
[3,    14] loss: 1.249
[4,     1] loss: 0.936
[4,     2] 

## data imbalance noise

In [7]:
di_cifar_loaders = partition_imbalance(cifar_train, 5)
di_imagenet_loaders = partition_imbalance(imagenet_train, 5)

In [8]:
num_epochs = 10
batch_size = 512
learning_rate = 0.001
divs = 3

In [9]:
# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(di_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(vgg.state_dict(), f'cinic_various_models/vgg_cifar_di_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters(), lr = learning_rate,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(di_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(vgg.state_dict(), f'cinic_various_models/vgg_imagenet_di_{i}.pt')

# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(di_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(cifar10_net.state_dict(), f'cinic_various_models/cifar10_net_cifar_di_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(di_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(cifar10_net.state_dict(), f'cinic_various_models/cifar10_net_imagenet_di_{i}.pt')

Training model 1
[1,     1] loss: 2.699
[1,     2] loss: 2.667
[1,     3] loss: 2.502
[1,     4] loss: 2.492
[1,     5] loss: 2.299
[1,     6] loss: 2.260
[1,     7] loss: 2.134
[1,     8] loss: 2.135
[1,     9] loss: 1.974
[1,    10] loss: 1.864
[1,    11] loss: 1.864
[1,    12] loss: 1.799
[1,    13] loss: 1.741
[1,    14] loss: 1.688
[1,    15] loss: 1.703
[1,    16] loss: 1.636
[1,    17] loss: 1.483
[1,    18] loss: 1.492
[1,    19] loss: 1.550
[1,    20] loss: 1.362
[1,    21] loss: 1.464
[1,    22] loss: 1.340
[1,    23] loss: 1.364
[1,    24] loss: 1.336
[1,    25] loss: 1.307
[1,    26] loss: 1.325
[1,    27] loss: 1.197
[1,    28] loss: 1.269
[1,    29] loss: 1.228
[1,    30] loss: 1.220
[1,    31] loss: 1.224
[1,    32] loss: 1.127
[1,    33] loss: 1.003
[1,    34] loss: 1.068
[1,    35] loss: 1.201
[1,    36] loss: 1.038
[1,    37] loss: 1.069
[1,    38] loss: 1.075
[1,    39] loss: 1.057
[1,    40] loss: 1.117
[2,     1] loss: 0.955
[2,     2] loss: 0.984
[2,     3] loss: 