# Targeted poisoning attack on MNIST dataset

## Import Libraries

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
from matplotlib import pyplot as plt
import collections

import random
from tqdm import tqdm
import copy

# ignore info and warnings form tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

2023-06-06 22:33:37.544307: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Parameters declaration

In [2]:
n_clients = 40
n_dataset_epochs = 5
n_train_epochs = 30
batch_size = 128

client_learning_rate = 0.02
server_learning_rate = 0.1

## Dataset Loading and manipulation

In [3]:
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()


2023-06-06 22:33:43.595626: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10362 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2023-06-06 22:33:43.596057: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 11390 MB memory:  -> device: 1, name: NVIDIA TITAN X (Pascal), pci bus id: 0000:02:00.0, compute capability: 6.1


### Preprocessing and organizing dataset

In [4]:
shuffle_buffer = 100
# todo change??
prefetch_buffer = 100

In [7]:
def batch_format(element):
    # flatten the images
    return collections.OrderedDict(
        x = tf.reshape(element['pixels'], [-1, 784]),
        y = tf.reshape(element['label'], [-1, 1]))

def preprocess(dataset):
    dataset = dataset.repeat(n_dataset_epochs)
    dataset = dataset.shuffle(shuffle_buffer, seed = 1)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(batch_format)
    dataset = dataset.prefetch(prefetch_buffer)

    return dataset

In [8]:
## testing the preprocessing function

example_dataset = emnist_train.create_tf_dataset_for_client(
    emnist_train.client_ids[0])
     
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))



In [9]:
def make_federated_data(client_data, client_ids):

    preprocessed_data = [preprocess(client_data.create_tf_dataset_for_client(x))
                            for x in client_ids]
    return preprocessed_data

## Model creation

In [13]:
def create_keras_model():
    return tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape = (784,)),
        tf.keras.layers.Dense(10, kernel_initializer = 'zeros'),
        tf.keras.layers.Softmax(),
    ])


In [14]:
def mnist_model():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec = preprocessed_example_dataset.element_spec,
        loss = tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])

## Training

In [15]:
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    mnist_model,
    client_optimizer_fn = lambda: tf.keras.optimizers.SGD(learning_rate = client_learning_rate),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate = server_learning_rate))

2023-06-06 22:34:35.882523: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'args_2' with dtype int32 and shape [?,1]
	 [[{{node args_2}}]]


In [16]:
train_state = training_process.initialize()


2023-06-06 22:34:38.804019: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-06-06 22:34:38.804139: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-06-06 22:34:38.806713: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10362 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2023-06-06 22:34:38.807063: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 11390 MB memory:  -> device: 1, name: NVIDIA TITAN X (Pascal), pci bus id: 0000:02:00.0, compute capability: 6.1
2023-06-06 22:34:38.823905: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-06-06 22:34:38.823974: I tensorflow/core/grappler/clusters/single_machine.

In [17]:
for epoch in range(1, n_train_epochs):
    #client selection, random, chosen from the first 100 clients
    clients = random.sample(emnist_train.client_ids[0:100], n_clients)
    
    #note: slow to converge with random clients, makes sense
    federated_train_data = make_federated_data(emnist_train, clients)

    result = training_process.next(train_state, federated_train_data)

    train_state = result.state
    train_metrics = result.metrics

    print('round {:2d}, metrics={}'.format(epoch, train_metrics))

reated device /job:localhost/replica:0/task:0/device:GPU:0 with 10362 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2023-06-06 22:34:58.363435: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 11390 MB memory:  -> device: 1, name: NVIDIA TITAN X (Pascal), pci bus id: 0000:02:00.0, compute capability: 6.1
2023-06-06 22:34:58.370497: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-06-06 22:34:58.370550: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-06-06 22:34:58.372544: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10362 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2023-06-06 22:34:58.372850: I tensorflow/

## Evaluation

In [21]:
# build the process to have the model's architecture
evaluation_process = tff.learning.algorithms.build_fed_eval(mnist_model)

# initialize the state of the evaluation
evaluation_state = evaluation_process.initialize()

# get weights from the trainged model
model_weights = training_process.get_model_weights(train_state)

# update the evaluation state with them
evaluation_state = evaluation_process.set_model_weights(evaluation_state, model_weights)

sample_clients = emnist_train.client_ids[:n_clients]

# test the model with the test data
# question: selection of clients during training??
federated_test_data = make_federated_data(emnist_test, sample_clients)

# run a next() to evaluate the model
evaluation_output = evaluation_process.next(evaluation_state, federated_test_data)

str(evaluation_output.metrics)



2023-06-06 22:40:29.528100: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'args_2' with dtype int32 and shape [?,1]
	 [[{{node args_2}}]]
2023-06-06 22:40:30.768388: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-06-06 22:40:30.768464: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-06-06 22:40:30.770619: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10362 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2023-06-06 22:40:30.770917: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 11390 MB

"OrderedDict([('distributor', ()), ('client_work', OrderedDict([('eval', OrderedDict([('current_round_metrics', OrderedDict([('sparse_categorical_accuracy', 0.0864745), ('loss', 2.2693663), ('num_examples', 2255), ('num_batches', 40)])), ('total_rounds_metrics', OrderedDict([('sparse_categorical_accuracy', 0.0864745), ('loss', 2.2693663), ('num_examples', 2255), ('num_batches', 40)]))]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])"

In [23]:
print(evaluation_output.metrics['loss'])

KeyError: 'loss'