## Data label poisoning

In [5]:
import torch
from PIL import Image
import torchvision
import torchvision.transforms as transforms
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cuda for inference


In [6]:
!pip install torchviz
!pip install torchsummary
!pip install efficientnet_pytorch



In [20]:
model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_widese_b4', pretrained=False)
model.eval().to(device)
import torch.optim as optim
import torch.nn as nn
num_epochs = 100
learning_rate = 0.0005
weight_decay = 0.0001
momentum = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40])
criterion = nn.CrossEntropyLoss()
checkpoint = torch.load("/kaggle/input/ef-net/ef_net_78.pth")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

epoch = checkpoint['epoch']
loss = checkpoint['loss']
accuracy = checkpoint['accuracy']
average_loss_test = checkpoint['test_accuracy']
average_loss = checkpoint['test_loss']
learning_rate = checkpoint['learning_rate']

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

import torchvision.datasets as datasets
normalize= transforms.Normalize(mean=[0.49186882, 0.48265398, 0.44717732], std=[0.24697122, 0.24338895, 0.2615926 ])
transform = transforms.Compose([
        transforms.Resize(64),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05),
        # transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.49186882, 0.48265398, 0.44717732],std=[0.24697122, 0.24338895, 0.2615926 ])
    ])

batch_size = 64
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=True, transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=batch_size, shuffle=True, pin_memory=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(
        datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ]), download=True),
        batch_size=batch_size, shuffle=True, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Using cache found in /root/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


Accuracy before label poisoning

In [8]:
import numpy as np

model.eval().to(device)
correct_predictions = np.zeros(len(classes))
total_samples = np.zeros(len(classes))

with torch.no_grad():
    for (inputs, targets) in testloader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)

        for label, prediction in zip(targets, predicted):
            if label == prediction:
                correct_predictions[label] += 1
            total_samples[label] += 1

for i, classname in enumerate(classes):
    accuracy = 100 * correct_predictions[i] / total_samples[i]
    print(f'Accuracy on test data {classname:5s} : {accuracy:.2f}%')

Accuracy on test data plane : 84.50%
Accuracy on test data car   : 90.90%
Accuracy on test data bird  : 74.20%
Accuracy on test data cat   : 68.50%
Accuracy on test data deer  : 81.10%
Accuracy on test data dog   : 70.90%
Accuracy on test data frog  : 87.40%
Accuracy on test data horse : 85.90%
Accuracy on test data ship  : 89.80%
Accuracy on test data truck : 89.40%


## Labet poisoning

In [21]:
import torchvision.datasets as datasets

train_labels = np.array(trainset.targets)

target_class = 3
percentage_to_change = 50
#new_label = 5

all_labels = set(range(10))
exclude_label = {target_class}
possible_new_labels = list(all_labels - exclude_label)

num_to_change = int((percentage_to_change / 100) * np.sum(train_labels == target_class))
indices_to_change = np.random.choice(np.where(train_labels == target_class)[0], num_to_change, replace=False)


print("Index\tOriginal Label\tNew Label")
for index in indices_to_change:
    new_label = np.random.choice(possible_new_labels)
    print(f"{index}\t{train_labels[index]}\t{new_label}")
    train_labels[index] = new_label

train_labels[indices_to_change] = new_label
trainset.targets = list(train_labels)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False)

Index	Original Label	New Label
21277	3	9
31978	3	2
47801	3	4
11736	3	7
35836	3	0
47067	3	2
24842	3	2
36145	3	7
45310	3	7
3087	3	6
7789	3	9
15545	3	2
40788	3	5
36159	3	2
33280	3	4
14230	3	7
37804	3	2
25934	3	2
12821	3	8
27540	3	0
25380	3	1
2930	3	0
43204	3	1
23417	3	8
25563	3	5
18006	3	4
27702	3	4
34014	3	8
48134	3	0
6758	3	6
28942	3	5
21400	3	6
46880	3	2
21991	3	6
41271	3	8
27069	3	8
25782	3	8
43779	3	5
6205	3	6
39681	3	8
47823	3	4
5534	3	8
36946	3	8
6574	3	6
41511	3	9
46157	3	5
44677	3	2
2562	3	2
47518	3	6
2723	3	8
38649	3	6
34496	3	1
15562	3	1
20067	3	7
22071	3	1
28631	3	5
30571	3	2
7004	3	7
18015	3	6
42740	3	8
22923	3	9
39194	3	8
11492	3	7
18442	3	0
6883	3	7
35130	3	5
44456	3	6
29855	3	4
34647	3	8
29567	3	8
46653	3	1
25307	3	8
16130	3	9
3109	3	8
27115	3	5
13221	3	6
47915	3	9
11324	3	7
18474	3	8
5260	3	9
37769	3	5
48331	3	0
34858	3	8
31402	3	2
47683	3	8
41992	3	8
48592	3	2
42633	3	7
42206	3	6
46882	3	7
20371	3	6
6724	3	5
46825	3	0
14861	3	7
42131	3	1
28786	3	8
34159	3	8
22905	3	7
354

## TEST

In [24]:
####TEST ONLY
test_labels = np.array(testset.targets)

# Assuming you want to poison the same target class with the same percentage
num_to_change_test = int((percentage_to_change / 100) * np.sum(test_labels == target_class))
indices_to_change_test = np.random.choice(np.where(test_labels == target_class)[0], num_to_change_test, replace=False)

print("Index\tOriginal Label\tNew Label")
for index in indices_to_change_test:
    new_label = np.random.choice(possible_new_labels)
    print(f"{index}\t{test_labels[index]}\t{new_label}")
    test_labels[index] = new_label

testset.targets = list(test_labels)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)


Index	Original Label	New Label
2752	3	8
2747	3	8
9994	3	8
6647	3	4
7313	3	0
3463	3	0
9422	3	5
8754	3	0
3456	3	4
8966	3	6
4147	3	1
3539	3	2
320	3	8
1787	3	6
3390	3	1
2157	3	9
4342	3	0
3049	3	9
6744	3	6
790	3	9
9246	3	1
5413	3	7
9141	3	7
1219	3	7
3259	3	6
9198	3	0
7813	3	7
9201	3	9
9087	3	8
8484	3	9
3065	3	8
1597	3	5
986	3	7
957	3	9
294	3	2
7496	3	0
5656	3	4
2815	3	7
9109	3	8
3353	3	7
8338	3	7
7644	3	8
3743	3	0
637	3	9
1813	3	5
8665	3	2
7598	3	7
6825	3	1
8331	3	7
7002	3	1
2144	3	0
2529	3	7
5198	3	6
9498	3	2
2049	3	1
4785	3	7
8778	3	8
1074	3	6
695	3	9
1775	3	6
1936	3	5
7228	3	8
2660	3	9
6340	3	9
911	3	6
4938	3	8
9338	3	4
46	3	2
5597	3	7
7310	3	4
8196	3	5
8299	3	6
2922	3	4
5424	3	2
4862	3	6
9748	3	7
7255	3	0
9292	3	8
6774	3	1
3479	3	2
1053	3	2
2720	3	1
6514	3	9
2173	3	4
9926	3	5
2422	3	1
9578	3	8
6406	3	6
3067	3	7
1030	3	8
8114	3	2
1871	3	0
2081	3	2
9404	3	9
1704	3	2
8181	3	8
9454	3	8
5361	3	9
9043	3	1
4718	3	9
8224	3	5
9308	3	7
6174	3	8
6976	3	6
5490	3	6
5272	3	1
7268	3	2
7211	3	0
1957	3	

Accuracy after data poisoning

In [25]:
import numpy as np

model.eval().to(device)
correct_predictions = np.zeros(len(classes))
total_samples = np.zeros(len(classes))

with torch.no_grad():
    for (inputs, targets) in testloader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)

        for label, prediction in zip(targets, predicted):
            if label == prediction:
                correct_predictions[label] += 1
            total_samples[label] += 1

for i, classname in enumerate(classes):
    accuracy = 100 * correct_predictions[i] / total_samples[i]
    print(f'Accuracy on test data {classname:5s} : {accuracy:.2f}%')

Accuracy on test data plane : 0.38%
Accuracy on test data car   : 0.00%
Accuracy on test data bird  : 30.21%
Accuracy on test data cat   : 57.80%
Accuracy on test data deer  : 0.29%
Accuracy on test data dog   : 10.82%
Accuracy on test data frog  : 0.00%
Accuracy on test data horse : 0.00%
Accuracy on test data ship  : 0.00%
Accuracy on test data truck : 0.66%


## Implementing SimCLR

In [None]:
import torch.nn as nn


class SimCLR(nn.Module):
    def __init__(self, base_encoder, projection_dim=128):
        super().__init__()
        self.enc = base_encoder(pretrained=False)  # load model from torchvision.models without pretrained weights.
        self.feature_dim = self.enc.fc.in_features

        # Customize for CIFAR10. Replace conv 7x7 with conv 3x3, and remove first max pooling.
        # See Section B.9 of SimCLR paper.
        self.enc.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
        self.enc.maxpool = nn.Identity()
        self.enc.fc = nn.Identity()  # remove final fully connected layer.

        # Add MLP projection.
        self.projection_dim = projection_dim
        self.projector = nn.Sequential(nn.Linear(self.feature_dim, 2048),
                                       nn.ReLU(),
                                       nn.Linear(2048, projection_dim))

    def forward(self, x):
        feature = self.enc(x)
        projection = self.projector(feature)
        return feature, projection

## SimCLR training

Epoch [1/50]:   0%|          | 0/782 [00:00<?, ?it/s]


AttributeError: 'dict' object has no attribute 'size'