In [1]:
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf

import numpy as np

import tensorflow_datasets as tfds

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

In [5]:
def prepare_federated_data(federated_dataset, batch_size, num_steps_until_rtc_check, seed=None):
    def process_client_dataset(_client_dataset, _batch_size, _num_steps_until_rtc_check, _seed):
        shuffle_size = _client_dataset.cardinality()  # Uniform shuffling
        return _client_dataset.shuffle(shuffle_size, seed=_seed).batch(_batch_size)
    
        #return _client_dataset.shuffle(shuffle_size, seed=_seed).repeat().batch(_batch_size) \
        #    .take(_num_steps_until_rtc_check)

    federated_dataset_prepared = [
        process_client_dataset(client_dataset, batch_size, num_steps_until_rtc_check, seed)
        for client_dataset in federated_dataset
    ]
    return federated_dataset_prepared


def create_unbiased_federated_data(X_train, y_train, num_clients):
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))

    # Shard the data across clients CLIENT LEVEL
    unbiased_federated_dataset = [
        train_dataset.shard(num_clients, i)
        for i in range(num_clients)
    ]

    return unbiased_federated_dataset


def create_unbiased_federated_data2(X_train, y_train, num_clients):
    
    X_train_unbiased_lst = np.array_split(X_train, num_clients)
    y_train_unbiased_lst = np.array_split(y_train, num_clients)
    
    unbiased_federated_dataset = [
        tf.data.Dataset.from_tensor_slices((X_train, y_train))
        for X_train, y_train in zip(X_train_unbiased_lst, y_train_unbiased_lst)
    ]

    return unbiased_federated_dataset

In [6]:
n = 20

In [7]:
unbiased_ds = create_unbiased_federated_data(X_train, y_train, n)

In [8]:
unbiased_ds2 = create_unbiased_federated_data2(X_train, y_train, n)

In [9]:
fed_unbiased = prepare_federated_data(unbiased_ds, 32, 1)

In [10]:
fed_unbiased2 = prepare_federated_data(unbiased_ds2, 32, 1)

In [11]:
60_000 / (20 * 32)

93.75

In [12]:
worker1_ds = fed_unbiased[0]
worker2_ds = fed_unbiased2[0]

In [15]:
tfds.benchmark(worker1_ds)


************ Summary ************



100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 783.40it/s]

Examples/sec (First included) 772.89 ex/sec (total: 95 ex, 0.12 sec)
Examples/sec (First only) 10.06 ex/sec (total: 1 ex, 0.10 sec)
Examples/sec (First excluded) 3993.98 ex/sec (total: 94 ex, 0.02 sec)





Unnamed: 0,duration,num_examples,avg
first+lasts,0.122916,95,772.886868
first,0.09938,1,10.062347
lasts,0.023535,94,3993.984889


In [16]:
tfds.benchmark(worker2_ds)


************ Summary ************



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 3508.22it/s]

Examples/sec (First included) 3251.90 ex/sec (total: 95 ex, 0.03 sec)
Examples/sec (First only) 104.67 ex/sec (total: 1 ex, 0.01 sec)
Examples/sec (First excluded) 4781.23 ex/sec (total: 94 ex, 0.02 sec)





Unnamed: 0,duration,num_examples,avg
first+lasts,0.029214,95,3251.903724
first,0.009553,1,104.674118
lasts,0.01966,94,4781.233881


In [None]:
def mnist_dataset_local_file(n, i, total_num_of_samples):
    
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

    # This must change if you want to read file in chunks
    data = (X_train, y_train)

    x_train = data.iloc[:, 1:].values.astype(np.float32)
    x_train = np.reshape(x_train,(total_num_of_samples, 28, 28))
    x_train = x_train / np.float32(255)

    y_train = data.iloc[:, 0].values.astype(np.int64)

    shard_size = x_train.shape[0] // n    
    print(f"data for worker:{i}")

    x_train = x_train[shard_size * i : shard_size * (i + 1)]
    y_train = y_train[shard_size * i : shard_size * (i + 1)]

    x_train_size = len(x_train)
    train_dataset = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)) #.shuffle(x_train_size)

    return train_dataset

def dataset_fn(global_batch_size, n, i, batch_size):
    #dataset = mnist_dataset(input_context.num_input_pipelines) 
    dataset = mnist_dataset_local_file(input_context, 60000)

    dataset = dataset.batch(batch_size)
    return dataset

In [1]:
l1 = [1, 2, 4, 5]
l2 = [1, 2, 4, 5]

In [2]:
x = zip(l1, l2)

In [3]:
x

<zip at 0x7f53a3e4e940>

In [6]:
next(x)

(4, 4)