In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support 

import warnings

warnings.filterwarnings("ignore")

from manipulate_MNIST import *
from Simple_CNN import *

In [2]:
# hyperparameters for the model
hidden_neurons = 128
drop_rate = 0.5
batch_size_train = 128
learning_rate = 0.001
epochs = 3

# parameters for the attack
mal_percentage = 0.5
target_honest = 3
target_malicious = 8

In [3]:
# Define the training data pipeline
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                      download=True, transform=transform)

# perform the label-flipping
trainset = generate_malicious_dataset(trainset, mal_percentage, target_honest, target_malicious)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

0%|          | 0/9912422 [00:00<?, ?it/s]Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz
100%|██████████| 9912422/9912422 [00:00<00:00, 11184576.72it/s]
Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
100%|██████████| 28881/28881 [00:00<00:00, 14400344.01it/s]
  0%|          | 0/1648877 [00:00<?, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
100%|██████████| 1648877/1648877 [00:00<00:00, 11086268.42it/s]
100%|█

In [4]:
# Instantiate the network and the optimizer
model = SimpleCNN(hidden_neurons, drop_rate)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [5]:
def train_epoch(optimizer, model, data):
    running_loss = 0.0
    for i, data in enumerate(tqdm(trainloader, 0)):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward 
        outputs = model(inputs)
        # compute loss
        loss = criterion(outputs, labels)
        # pass gradients back
        loss.backward()
        # update parameters
        optimizer.step()
        preds = torch.argmax(outputs, axis=1)

        metrics_list = precision_recall_fscore_support(preds, labels, average='macro')

    return loss, metrics_list[0], metrics_list[1], metrics_list[2]


In [6]:
# Train the network
loss = []
for epoch in range(epochs):  # loop over the dataset multiple times
    epoch_loss, precision, recall, f1_score = train_epoch(optimizer, model, trainloader)
    print("Epoch: ", epoch, " loss: ", round(epoch_loss.item(),4), " precision: ", round(precision, 4), " recall: ", round(recall, 4), " f1_score ", round(f1_score, 4))
    loss.append(epoch_loss.item())

100%|██████████| 1875/1875 [00:57<00:00, 32.35it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]Epoch:  0  loss:  0.3544  precision:  0.83  recall:  0.8333  f1_score  0.8056
100%|██████████| 1875/1875 [00:57<00:00, 32.87it/s]
  0%|          | 0/1875 [00:00<?, ?it/s]Epoch:  1  loss:  0.1966  precision:  0.9167  recall:  0.963  f1_score  0.9269
100%|██████████| 1875/1875 [01:08<00:00, 27.46it/s]Epoch:  2  loss:  0.2373  precision:  0.8633  recall:  0.835  f1_score  0.8405



In [7]:
# Define the testing data pipeline
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                     download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)

# Test the network and print the classification report
true_labels = []
pred_labels = []

with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        true_labels += labels.tolist()
        pred_labels += predicted.tolist()

print("Accuracy score: {}".format(round(accuracy_score(pred_labels,true_labels),3)))
print(classification_report(true_labels, pred_labels))

Accuracy score: 0.886
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.96      0.99      0.97      1135
           2       0.96      0.93      0.95      1032
           3       0.98      0.27      0.43      1010
           4       0.96      0.96      0.96       982
           5       0.93      0.95      0.94       892
           6       0.96      0.97      0.96       958
           7       0.95      0.94      0.94      1028
           8       0.55      0.93      0.69       974
           9       0.92      0.94      0.93      1009

    accuracy                           0.89     10000
   macro avg       0.91      0.89      0.88     10000
weighted avg       0.92      0.89      0.88     10000

