In [1]:
import os
from operator import itemgetter
import collections

import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
import nest_asyncio
nest_asyncio.apply()
tf.compat.v1.enable_v2_behavior()

print(f'Tensorflow version: {tf.__version__}')
print(f'Tensorflow Federated version: {tff.__version__}')

Tensorflow version: 2.1.0
Tensorflow Federated version: 0.12.0


In [2]:
# Global variables
# Setup scripts (or notebook)
IMG_DATA = './federated_data_5_balanced_iid'
VAL_DATA = './sampled_data_1184'
IMG_SHAPE = (39, 39)
MAX_STEPS = 1000
CLASSES = ['aim', 'email', 'facebook', 'ftps', 'gmail', 
           'hangout', 'icqchat', 'netflix', 'scp', 'sftp',
           'skype', 'spotify', 'torrent', 'vimeo', 'voipbuster',
           'youtube']

In [3]:
%%time
# prepare dataset
dataset_root = os.path.abspath(os.path.expanduser(IMG_DATA))
print(f'Dataset root: {dataset_root}')

img_gen_op = {'classes': CLASSES, 'target_size': IMG_SHAPE}
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255)

def gen_fn(args):
    data_path = args.decode('utf-8')
    return image_generator.flow_from_directory(data_path,
                                               **img_gen_op)

dataset_dict = dict()
with os.scandir(dataset_root) as it:
    for entry in it:
        if entry.is_dir():
            name = os.path.basename(entry.path)
            ds = tf.data.Dataset.from_generator(gen_fn,
                                                args=[entry.path],
                                                output_types=(tf.float32, tf.float32),
                                                output_shapes=(tf.TensorShape([None, 39, 39, 3]), 
                                                               tf.TensorShape([None, 16]))
                                               )
            dataset_dict[name] = ds

Dataset root: /home/harny/Github/tff-app/federated_data_5_balanced_iid
CPU times: user 131 ms, sys: 5.05 ms, total: 136 ms
Wall time: 152 ms


In [4]:
def client_fn(client_id):
    return dataset_dict[client_id]

client_data = tff.simulation.ClientData.from_clients_and_fn(
                client_ids=list(dataset_dict.keys()),
                create_tf_dataset_for_client_fn=client_fn)

train_ids = list(dataset_dict.keys())
train_ids.remove('0')
dataset = [client_data.create_tf_dataset_for_client(x) for x in train_ids]

In [5]:
example_dataset = client_data.create_tf_dataset_for_client(client_data.client_ids[0])
print(example_dataset)

<FlatMapDataset shapes: ((None, 39, 39, 3), (None, 16)), types: (tf.float32, tf.float32)>


In [6]:
def preprocess(dataset):
    return dataset.take(32*10).cache().repeat(1)
preprocessed_example_dataset = preprocess(example_dataset)
sample_batch = tf.nest.map_structure(lambda x: x.numpy(), iter(preprocessed_example_dataset).next())
print(sample_batch[0].shape, sample_batch[1].shape)

Found 11257 images belonging to 16 classes.
(32, 39, 39, 3) (32, 16)


In [7]:
federated_dataset = [preprocess(x) for x in dataset]

In [8]:
def create_compiled_keras_model():
    base_learning_rate = 0.001 # default

    model = tf.keras.models.Sequential([
                    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=IMG_SHAPE + (3, )),
                    tf.keras.layers.MaxPooling2D((2, 2)),
                    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
                    tf.keras.layers.MaxPooling2D((2, 2)),
                    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
                    tf.keras.layers.Flatten(),
                    tf.keras.layers.Dense(64, activation='relu'),
                    tf.keras.layers.Dense(len(CLASSES))])

    model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=[tf.keras.metrics.CategoricalAccuracy()])
    
    return model

In [9]:
def model_fn():
    keras_model = create_compiled_keras_model()
    return tff.learning.from_compiled_keras_model(keras_model, sample_batch)

In [10]:
iterative_process = tff.learning.build_federated_averaging_process(model_fn)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


In [11]:
str(iterative_process.initialize.type_signature)

'( -> <model=<trainable=<float32[3,3,3,32],float32[32],float32[3,3,32,64],float32[64],float32[3,3,64,64],float32[64],float32[2304,64],float32[64],float32[64,16],float32[16]>,non_trainable=<>>,optimizer_state=<int64>,delta_aggregate_state=<>,model_broadcast_state=<>>@SERVER)'

In [12]:
state = iterative_process.initialize()

In [13]:
state, metrics = iterative_process.next(state, federated_dataset)
print('round  1, metrics={}'.format(metrics))

Found 11257 images belonging to 16 classes.
Found 11256 images belonging to 16 classes.
Found 11245 images belonging to 16 classes.Found 11248 images belonging to 16 classes.

Found 11258 images belonging to 16 classes.
round  1, metrics=<categorical_accuracy=0.6935351490974426,loss=1.2649835348129272,keras_training_time_client_sum_sec=0.0061228275299072266>


In [14]:
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_dataset)
    print('round {:2d}, metrics={}'.format(round_num, metrics))

round  2, metrics=<categorical_accuracy=0.768359363079071,loss=1.0161627531051636,keras_training_time_client_sum_sec=0.007855892181396484>
round  3, metrics=<categorical_accuracy=0.7726171612739563,loss=0.9662094116210938,keras_training_time_client_sum_sec=0.00530242919921875>
round  4, metrics=<categorical_accuracy=0.7770702838897705,loss=0.9449036121368408,keras_training_time_client_sum_sec=0.0047206878662109375>
round  5, metrics=<categorical_accuracy=0.7808789014816284,loss=0.9135715365409851,keras_training_time_client_sum_sec=0.005410909652709961>
round  6, metrics=<categorical_accuracy=0.824999988079071,loss=0.6124383211135864,keras_training_time_client_sum_sec=0.005804538726806641>
round  7, metrics=<categorical_accuracy=0.8448241949081421,loss=0.5104010701179504,keras_training_time_client_sum_sec=0.004790306091308594>
round  8, metrics=<categorical_accuracy=0.8627734184265137,loss=0.4376815855503082,keras_training_time_client_sum_sec=0.004714012145996094>
round  9, metrics=<cat

In [18]:
# For evaluation
test_dataset = client_data.create_tf_dataset_for_client('0')
federated_test_data = [preprocess(test_dataset)]

In [19]:
evaluation = tff.learning.build_federated_evaluation(model_fn)

  This is separate from the ipykernel package so we can avoid doing imports until


In [20]:
test_metrics = evaluation(state.model, federated_test_data)
print(test_metrics)

Found 11258 images belonging to 16 classes.
<categorical_accuracy=0.9083007574081421,loss=0.29094889760017395,keras_training_time_client_sum_sec=0.0>
