In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:

import collections

import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

tff.backends.test.set_test_execution_context()  # for secure aggregation

np.random.seed(0)

print(f'TF version: {tf.__version__}\nTFF version: {tff.__version__}')

tff.federated_computation(lambda: 'Hello, World!')()

TF version: 2.7.0-dev20210829
TFF version: 0.19.0


b'Hello, World!'

In [3]:
from hierarchical_histogram.hierarchical_histogram import build_hierarchical_histogram_computation
from hierarchical_histogram.hierarchical_histogram_decoder import HierarchicalHistogramDecoder

# Goal
Construct a hierarchical histogram from synthetic data and compute its median (0.5-quantile). 

For simplicity, I will start with 'no-noise' DP and add noise later once I get the semantics of these methods right. 

In [42]:
hihi_computation = build_hierarchical_histogram_computation(
    lower_bound=0, upper_bound=1.0, num_bins=10,
    dp_mechanism='no-noise',
    # dp_mechanism='distributed-discrete-gaussian', noise_multiplier=0.1, 
    expected_clients_per_round=100)

client_losses = np.linspace(0, 1, 100)[:, None].astype(np.float32).tolist()
# `client_losses` is a list of 100 entries, each of which is a single-element list
print('client losses sample:', client_losses[:5])  

# Create a tf Dataset for each client. Each client has only one single scalar. 
# `hihi_computation` expects a `tf.data.Dataset` at each client.
client_losses_ds = [tf.data.Dataset.from_tensor_slices(l) for l in client_losses]

hist = hihi_computation(client_losses_ds)  # compute the histogram from the data
decoder = HierarchicalHistogramDecoder(hist)  # build the decoder object
decoder.enforce_consistency()  # required before running quantile query

bin_id = decoder.quantile_query(0.5)  # quantile query returns the index of leaf
layer_of_leaf = decoder._num_layers - 1  # layer id of the leaf
# Compute the value of the node. Does this give us the quantile estimate?
quantile_estimate = decoder.node_query(layer_of_leaf, bin_id) 

print(f'bin_id: {bin_id}\tlayer_id: {layer_of_leaf}\nQuantile estimate: {quantile_estimate}')
print(f'Type of histogram: {hist.dtype}\tType of quantile estimate: {type(quantile_estimate)}')



client losses sample: [[0.0], [0.010101010091602802], [0.020202020183205605], [0.03030303120613098], [0.04040404036641121]]
bin_id: 4	layer_id: 4
Quantile estimate: 10
Type of histogram: <dtype: 'int32'>	Type of quantile estimate: <class 'int'>


# Questions:
1. Why is `hist.dtype` int32? I expect it to be float32, same as the data. 
2. As per the type signatures, `decoder.node_query`'s return type is float. Why does it return an integer?
3. My data is uniformly spaced between 0 and 1. Therefore, the 0.5-quantile should be ~0.5. In this sense, a `bin_id` of 4 is reasonable to me. How do I obtain the actual float32 value of the quantile from this?