In [10]:
import torchvision
import torchvision.transforms as transforms
import torch
from tqdm import tqdm
from scipy.stats import mode
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from models_used import CIFAR10_Net
from torchvision import models

from noise_added import partition_imbalance, flip_labels_imbalance

In [11]:
cinic_directory = 'cinic_10_data'
cinic_mean = [0.47889522, 0.47227842, 0.43047404]
cinic_std = [0.24205776, 0.23828046, 0.25874835]

batch_size = 512


# Train data
cifar_train = torchvision.datasets.ImageFolder(
    cinic_directory + '/train2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_train = imagenet = torchvision.datasets.ImageFolder(
    cinic_directory + '/train2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)


# Validation data
cifar_valid = torchvision.datasets.ImageFolder(
    cinic_directory + '/valid2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_valid = torchvision.datasets.ImageFolder(
    cinic_directory + '/valid2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)


# Test data
cifar_test = torchvision.datasets.ImageFolder(
    cinic_directory + '/test2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_test = torchvision.datasets.ImageFolder(
    cinic_directory + '/test2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


## Flipped labels

In [23]:
fl_cifar_loaders = flip_labels_imbalance(dataset =  cifar_train, flip_percentages = [0,0.3,0.5,0.7], divs = 4)
fl_imagenet_loaders = flip_labels_imbalance(dataset =  imagenet_train, flip_percentages = [0,0.3,0.5,0.7], divs = 4)

In [24]:
num_epochs = 10
batch_size = 512
learning_rate = 0.001
divs = 4

In [25]:
# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(fl_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(vgg.state_dict(), f'cinic_various_models_2/vgg_cifar_fl_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters(), lr = learning_rate,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(fl_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(vgg.state_dict(), f'cinic_various_models_2/vgg_imagenet_fl_{i}.pt')

# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(fl_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_2/cifar10_net_cifar_fl_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(fl_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_2/cifar10_net_imagenet_fl_{i}.pt')

Training model 1
[1,     1] loss: 2.739
[1,     2] loss: 2.673
[1,     3] loss: 2.611
[1,     4] loss: 2.417
[1,     5] loss: 2.322
[1,     6] loss: 2.202
[1,     7] loss: 2.112
[1,     8] loss: 2.050
[1,     9] loss: 1.955
[1,    10] loss: 1.913
[2,     1] loss: 1.875
[2,     2] loss: 1.760
[2,     3] loss: 1.708
[2,     4] loss: 1.678
[2,     5] loss: 1.611
[2,     6] loss: 1.614
[2,     7] loss: 1.532
[2,     8] loss: 1.444
[2,     9] loss: 1.419
[2,    10] loss: 1.445
[3,     1] loss: 1.301
[3,     2] loss: 1.273
[3,     3] loss: 1.315
[3,     4] loss: 1.300
[3,     5] loss: 1.248
[3,     6] loss: 1.371
[3,     7] loss: 1.141
[3,     8] loss: 1.191
[3,     9] loss: 1.170
[3,    10] loss: 1.061
[4,     1] loss: 1.141
[4,     2] loss: 1.144
[4,     3] loss: 1.130
[4,     4] loss: 1.041
[4,     5] loss: 0.985
[4,     6] loss: 1.057
[4,     7] loss: 1.067
[4,     8] loss: 0.952
[4,     9] loss: 0.959
[4,    10] loss: 0.957
[5,     1] loss: 1.003
[5,     2] loss: 0.955
[5,     3] loss: 

## data imbalance noise

In [18]:
di_cifar_loaders = partition_imbalance(cifar_train, 4)
di_imagenet_loaders = partition_imbalance(imagenet_train, 4)

In [19]:
num_epochs = 10
batch_size = 512
learning_rate = 0.001
divs = 4

In [20]:
# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(di_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(vgg.state_dict(), f'cinic_various_models_2/vgg_cifar_di_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters(), lr = learning_rate,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(di_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(vgg.state_dict(), f'cinic_various_models_2/vgg_imagenet_di_{i}.pt')

# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(di_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_2/cifar10_net_cifar_di_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(di_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_2/cifar10_net_imagenet_di_{i}.pt')

Training model 1
[1,     1] loss: 2.586
[1,     2] loss: 2.588
[1,     3] loss: 2.485
[1,     4] loss: 2.399
[1,     5] loss: 2.267
[1,     6] loss: 2.096
[1,     7] loss: 2.098
[1,     8] loss: 2.008
[1,     9] loss: 1.942
[1,    10] loss: 1.912
[1,    11] loss: 1.839
[1,    12] loss: 1.771
[1,    13] loss: 1.678
[1,    14] loss: 1.615
[1,    15] loss: 1.601
[1,    16] loss: 1.579
[1,    17] loss: 1.476
[1,    18] loss: 1.468
[1,    19] loss: 1.304
[1,    20] loss: 1.295
[1,    21] loss: 1.296
[1,    22] loss: 1.273
[1,    23] loss: 1.237
[1,    24] loss: 1.310
[1,    25] loss: 1.229
[1,    26] loss: 1.293
[1,    27] loss: 1.227
[1,    28] loss: 1.132
[1,    29] loss: 1.192
[1,    30] loss: 1.194
[1,    31] loss: 1.166
[1,    32] loss: 1.064
[1,    33] loss: 1.152
[1,    34] loss: 1.079
[1,    35] loss: 1.162
[1,    36] loss: 1.140
[1,    37] loss: 1.053
[1,    38] loss: 1.081
[1,    39] loss: 1.076
[1,    40] loss: 1.146
[2,     1] loss: 1.046
[2,     2] loss: 1.082
[2,     3] loss: 