In [1]:
import torchvision
import torchvision.transforms as transforms
import torch
from tqdm import tqdm
from scipy.stats import mode
from sklearn.metrics import accuracy_score
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from models_used import CIFAR10_Net
from torchvision import models

from noise_added import partition_imbalance, flip_labels_imbalance

In [2]:
cinic_directory = 'cinic_10_data'
cinic_mean = [0.47889522, 0.47227842, 0.43047404]
cinic_std = [0.24205776, 0.23828046, 0.25874835]

batch_size = 512


# Train data
cifar_train = torchvision.datasets.ImageFolder(
    cinic_directory + '/train2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_train = imagenet = torchvision.datasets.ImageFolder(
    cinic_directory + '/train2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)


# Validation data
cifar_valid = torchvision.datasets.ImageFolder(
    cinic_directory + '/valid2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_valid = torchvision.datasets.ImageFolder(
    cinic_directory + '/valid2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)


# Test data
cifar_test = torchvision.datasets.ImageFolder(
    cinic_directory + '/test2/cifar',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

imagenet_test = torchvision.datasets.ImageFolder(
    cinic_directory + '/test2/imagenet',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cinic_mean, std=cinic_std)
    ])
)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


## Flipped labels

In [4]:
fl_cifar_loaders = flip_labels_imbalance(dataset =  cifar_train, flip_percentages = [0,0.5], divs = 2)
fl_imagenet_loaders = flip_labels_imbalance(dataset =  imagenet_train, flip_percentages = [0,0.5], divs = 2)

In [5]:
num_epochs = 10
batch_size = 512
learning_rate = 0.001
divs = 2

In [6]:
# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(fl_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(vgg.state_dict(), f'cinic_various_models_3/vgg_cifar_fl_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters(), lr = learning_rate,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(fl_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(vgg.state_dict(), f'cinic_various_models_3/vgg_imagenet_fl_{i}.pt')

# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(fl_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_3/cifar10_net_cifar_fl_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(fl_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_3/cifar10_net_imagenet_fl_{i}.pt')

Training model 1




[1,     1] loss: 2.759
[1,     2] loss: 2.692
[1,     3] loss: 2.488
[1,     4] loss: 2.455
[1,     5] loss: 2.361
[1,     6] loss: 2.212
[1,     7] loss: 2.143
[1,     8] loss: 2.109
[1,     9] loss: 1.952
[1,    10] loss: 1.945
[1,    11] loss: 1.908
[1,    12] loss: 1.842
[1,    13] loss: 1.753
[1,    14] loss: 1.706
[1,    15] loss: 1.649
[1,    16] loss: 1.592
[1,    17] loss: 1.588
[1,    18] loss: 1.520
[1,    19] loss: 1.446
[1,    20] loss: 1.466
[2,     1] loss: 1.401
[2,     2] loss: 1.393
[2,     3] loss: 1.313
[2,     4] loss: 1.280
[2,     5] loss: 1.309
[2,     6] loss: 1.265
[2,     7] loss: 1.312
[2,     8] loss: 1.285
[2,     9] loss: 1.247
[2,    10] loss: 1.202
[2,    11] loss: 1.236
[2,    12] loss: 1.120
[2,    13] loss: 1.190
[2,    14] loss: 1.115
[2,    15] loss: 1.177
[2,    16] loss: 1.104
[2,    17] loss: 1.125
[2,    18] loss: 1.160
[2,    19] loss: 1.119
[2,    20] loss: 1.045
[3,     1] loss: 0.955
[3,     2] loss: 0.997
[3,     3] loss: 0.977
[3,     4] 

## data imbalance noise

In [7]:
di_cifar_loaders = partition_imbalance(cifar_train, 2)
di_imagenet_loaders = partition_imbalance(imagenet_train, 2)

In [8]:
num_epochs = 10
batch_size = 512
learning_rate = 0.001
divs = 2

In [9]:
# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(di_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(vgg.state_dict(), f'cinic_various_models_3/vgg_cifar_di_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    vgg = models.vgg16(pretrained = True)
    input_lastLayer = vgg.classifier[6].in_features
    vgg.classifier[6] = nn.Linear(input_lastLayer,10)
    vgg = vgg.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(vgg.parameters(), lr = learning_rate,weight_decay=5e-4)
    for epoch in range(num_epochs):
        vgg.train()
        running_loss = 0.0
        for j, data in enumerate(di_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = vgg(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(vgg.state_dict(), f'cinic_various_models_3/vgg_imagenet_di_{i}.pt')

# Train 5 different cifar10_net models on the 5 splits of CIFAR-10 and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(di_cifar_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, loss.item()))
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_3/cifar10_net_cifar_di_{i}.pt')

# Train 5 different imagenet_net models on the 5 splits of ImageNet and save the models

for i in range(divs):
    print(f"Training model {i+1}")
    cifar10_net = CIFAR10_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cifar10_net.parameters(), lr=0.001)
    for epoch in range(num_epochs):
        cifar10_net.train()
        running_loss = 0.0
        for j, data in enumerate(di_imagenet_loaders[i], 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = cifar10_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if j % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, j + 1, running_loss / 100))
                running_loss = 0.0
    torch.save(cifar10_net.state_dict(), f'cinic_various_models_3/cifar10_net_imagenet_di_{i}.pt')

Training model 1
[1,     1] loss: 2.788
[1,     2] loss: 2.795
[1,     3] loss: 2.644
[1,     4] loss: 2.511
[1,     5] loss: 2.407
[1,     6] loss: 2.298
[1,     7] loss: 2.155
[1,     8] loss: 2.101
[1,     9] loss: 1.998
[1,    10] loss: 1.995
[1,    11] loss: 1.917
[1,    12] loss: 1.818
[1,    13] loss: 1.827
[1,    14] loss: 1.714
[1,    15] loss: 1.721
[1,    16] loss: 1.653
[1,    17] loss: 1.615
[1,    18] loss: 1.558
[1,    19] loss: 1.546
[1,    20] loss: 1.496
[1,    21] loss: 1.436
[1,    22] loss: 1.445
[1,    23] loss: 1.315
[1,    24] loss: 1.353
[1,    25] loss: 1.378
[1,    26] loss: 1.336
[1,    27] loss: 1.342
[1,    28] loss: 1.201
[1,    29] loss: 1.169
[1,    30] loss: 1.175
[1,    31] loss: 1.194
[1,    32] loss: 1.182
[1,    33] loss: 1.249
[1,    34] loss: 1.087
[1,    35] loss: 1.184
[1,    36] loss: 1.156
[1,    37] loss: 1.117
[1,    38] loss: 1.044
[1,    39] loss: 1.075
[1,    40] loss: 0.819
[2,     1] loss: 1.067
[2,     2] loss: 1.008
[2,     3] loss: 