In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
import numpy as np

In [6]:
def load_mnist_from_local_npz():
    with np.load("../../FdAvg/data/data/mnist/mnist.npz") as data:
        return (data['X_train'], data['y_train']), (data['X_test'], data['y_test'])

In [7]:
(X_train, y_train), (X_test, y_test) = load_mnist_from_local_npz()

In [138]:
# Extract examples where the label is 0 for the training set
X_train_zeros = X_train[y_train == 0]
y_train_zeros = y_train[y_train == 0]

In [139]:
len(y_train_zeros)

5923

In [140]:
X_train_rest = X_train[y_train != 0]
y_train_rest = y_train[y_train != 0]

In [141]:
X_train_non_uniform_biased = np.concatenate((X_train_zeros, X_train_rest))
y_train_non_uniform_biased = np.concatenate((y_train_zeros, y_train_rest))

In [142]:
num_clients = 60

In [143]:
X_train_non_uniform_biased_lst = np.array_split(X_train_non_uniform_biased, num_clients)
y_train_non_uniform_biased_lst = np.array_split(y_train_non_uniform_biased, num_clients)

In [144]:
non_uniform_biased_federated_dataset = []

In [145]:
for X_train, y_train in zip(X_train_non_uniform_biased_lst, y_train_non_uniform_biased_lst):
    non_uniform_biased_federated_dataset.append(
        tf.data.Dataset.from_tensor_slices((X_train, y_train))
    )

In [146]:
sum(ds.cardinality() for ds in non_uniform_biased_federated_dataset)

<tf.Tensor: shape=(), dtype=int64, numpy=60000>

In [2]:
tf.random.normal(shape=[4])

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.0268528 , 0.42395547, 1.6263357 , 0.8324397 ], dtype=float32)>

In [3]:
tf.random.normal(shape=[4])

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-1.2719796 ,  0.5056329 , -0.18683623, -0.3944708 ], dtype=float32)>

In [10]:
tf.random.stateless_normal(shape=[6000], seed=[1, 2])

<tf.Tensor: shape=(6000,), dtype=float32, numpy=
array([ 0.5441101 ,  0.20738031,  0.07356432, ..., -0.5228442 ,
        2.8992932 ,  0.07690824], dtype=float32)>

In [2]:
tf.random.stateless_normal(shape=[6000], seed=[1, 2])

<tf.Tensor: shape=(6000,), dtype=float32, numpy=
array([ 0.5441101 ,  0.20738031,  0.07356432, ..., -0.5228442 ,
        2.8992932 ,  0.07690824], dtype=float32)>

In [6]:
tf.reduce_all(tf.equal(1, 1))

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [3]:
def create_one_label_biased_federated_data(X_train, y_train, num_clients):
    """
    Create non-iid federated data with a specific label (0-label) completely non-uniformly distributed (potentially
    whole samples of said label will go on a few clients only). Almost equal cardinality of each clients' dataset.
    Rest of dataset is iid (without the zero label examples).

    Args:
        X_train (numpy.ndarray): The training data features.
        y_train (numpy.ndarray): The training data labels.
        num_clients (int): The number of clients among which the data should be distributed.

    Returns:
        list of tf.data.Dataset: A list of TensorFlow Dataset objects. Each dataset in the list corresponds to
        the data shard for a client. The order of the datasets in the list corresponds to the order of the clients.
    """

    X_train_zeros = X_train[y_train == 0]
    y_train_zeros = y_train[y_train == 0]

    X_train_rest = X_train[y_train != 0]
    y_train_rest = y_train[y_train != 0]

    X_train_one_label_biased = np.concatenate((X_train_zeros, X_train_rest))
    y_train_one_label_biased = np.concatenate((y_train_zeros, y_train_rest))

    X_train_one_label_biased_lst = np.array_split(X_train_one_label_biased, num_clients)
    y_train_one_label_biased_lst = np.array_split(y_train_one_label_biased, num_clients)

    one_label_biased_federated_dataset = []

    for X_train, y_train in zip(X_train_one_label_biased_lst, y_train_one_label_biased_lst):
        one_label_biased_federated_dataset.append(
            tf.data.Dataset.from_tensor_slices((X_train, y_train))
        )

    return one_label_biased_federated_dataset

In [9]:
one_label_biased_federated_dataset = create_one_label_biased_federated_data(X_train, y_train, 50)

In [10]:
sum(ds.cardinality() for ds in one_label_biased_federated_dataset)

<tf.Tensor: shape=(), dtype=int64, numpy=60000>

In [14]:
next(iter(one_label_biased_federated_dataset[0]))[1]

<tf.Tensor: shape=(), dtype=uint8, numpy=0>

In [33]:
str(float(-1))

'-1.0'

In [34]:
-1.0 == -1

True