In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow_federated as tff

2022-09-24 13:01:59.585146: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-24 13:01:59.685529: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-09-24 13:01:59.689059: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-24 13:01:59.689074: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if yo

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
%load_ext tensorboard

## Federate Core Test

In [4]:
seed = 2022

n_samples = 150 # number of samples
k = 1 # number of x variables/cols
j = 3 # number of servers

# x = [np.random.uniform(size = (n_samples, k)).tolist() for _ in range(j)]
x = [np.random.uniform(size = n_samples).tolist() for _ in range(j)]
y = np.random.uniform(size = (n_samples, n_samples))

j_index = 0
x_j = x[j_index]

## Compare tf.Dataset and TFF

### 1. Convert to format and get average

#### Dataset

In [5]:
def dataset_get_local_average(local_vals):
    sum_and_count = (local_vals.reduce((0.0, 0), lambda x, y: (x[0] + y, x[1] + 1)))
    return sum_and_count[0] / tf.cast(sum_and_count[1], tf.float32)

In [6]:
dataset = tf.data.Dataset.from_tensor_slices(x_j) # convert to tf dataset
x_avg = dataset_get_local_average(dataset).numpy()
x_avg

2022-09-24 13:02:01.335459: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-09-24 13:02:01.335476: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-09-24 13:02:01.335487: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (rgus-Latitude-7520): /proc/driver/nvidia/version does not exist
2022-09-24 13:02:01.335697: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


0.439388

#### TFF

In [7]:
# get averages!
@tff.tf_computation(tff.SequenceType(tf.float32))
def get_local_average(local_vals):
    sum_and_count = (local_vals.reduce((0.0, 0), lambda x, y: (x[0] + y, x[1] + 1)))
    return sum_and_count[0] / tf.cast(sum_and_count[1], tf.float32)

@tff.federated_computation(tff.type_at_clients(tff.SequenceType(tf.float32)))
def avg_map(vals):
    return tff.federated_map(get_local_average, vals)

In [8]:
fed_x_avg = avg_map([x_j])[0]
fed_x_avg

0.439388

### 2. Get covariance

#### Dataset

In [9]:
def get_element(dataset):
    for element in dataset:
        print(element)

In [10]:
# convert x to a matrix
dataset_repeated = dataset.map(lambda x: tf.repeat(x, n_samples, axis = 0))
dataset_repeated = dataset_repeated.batch(n_samples, drop_remainder=False)

get_element(dataset_repeated)

tf.Tensor(
[[0.5745409  0.5745409  0.5745409  ... 0.5745409  0.5745409  0.5745409 ]
 [0.66319996 0.66319996 0.66319996 ... 0.66319996 0.66319996 0.66319996]
 [0.5159908  0.5159908  0.5159908  ... 0.5159908  0.5159908  0.5159908 ]
 ...
 [0.4446365  0.4446365  0.4446365  ... 0.4446365  0.4446365  0.4446365 ]
 [0.77188975 0.77188975 0.77188975 ... 0.77188975 0.77188975 0.77188975]
 [0.17519735 0.17519735 0.17519735 ... 0.17519735 0.17519735 0.17519735]], shape=(150, 150), dtype=float32)


In [11]:
# get covariance col vector
cov_vec = dataset_repeated.map(lambda x: tfp.stats.covariance(x, y, sample_axis=0, event_axis=None))
cov_vec_np = next(cov_vec.as_numpy_iterator())
cov_vec_np

array([ 2.87773856e-03,  4.05198755e-03, -7.10635027e-03, -1.00596622e-03,
        3.51213268e-03, -8.66604038e-03,  8.57600570e-03,  6.65412867e-04,
       -4.20591095e-03, -6.74881926e-03,  6.12326548e-04,  1.43226795e-02,
       -1.32999234e-02,  1.68486610e-02,  1.02900749e-03,  4.77856351e-03,
       -6.03951630e-04,  7.83179700e-03, -7.03747105e-03, -5.22249797e-03,
        4.91724117e-03, -2.94933716e-06, -2.57727923e-03,  3.72699648e-03,
       -6.13499607e-04,  4.13555565e-04,  8.56180268e-04, -6.12876285e-03,
       -3.92249133e-03, -8.56847037e-03,  4.37103631e-03, -7.72171188e-03,
       -7.54889892e-03,  7.12684821e-04,  1.48570742e-02, -4.48371749e-03,
       -9.06115957e-03, -3.59643344e-03,  2.18514819e-03,  1.33445151e-02,
        1.08226184e-02,  5.09223901e-03, -6.91226823e-03, -5.47358580e-03,
        4.01620427e-03, -1.13173760e-03,  4.80033970e-03, -3.47859133e-03,
       -9.78258671e-04,  6.99048769e-03, -6.02292875e-03,  5.65557228e-03,
        9.78637673e-03, -

#### TFF

In [12]:
def get_y():
    return y

# merge to tensor
@tff.tf_computation(tff.SequenceType(tf.float32))
def local_covariance(vec):
    out = vec.map(lambda x: tf.repeat(x, n_samples, axis = 0))
    out = out.batch(n_samples, drop_remainder=False)
    out = out.map(lambda x: tfp.stats.covariance(x, get_y(), sample_axis=0, event_axis=None)) # this function computes the covariance
    return out
str(local_covariance.type_signature)

'(float32* -> float32[150]*)'

In [13]:
# merge to tensor
@tff.federated_computation(tff.type_at_clients(tff.SequenceType(tf.float32)))
def covariance_map(vecs):
    return tff.federated_map(local_covariance, vecs)

In [14]:
fed_cov_vec = covariance_map([x_j])
fed_cov_vec_np = next(fed_cov_vec[0].as_numpy_iterator())
fed_cov_vec_np

array([ 2.87773856e-03,  4.05198755e-03, -7.10635027e-03, -1.00596622e-03,
        3.51213268e-03, -8.66604038e-03,  8.57600570e-03,  6.65412867e-04,
       -4.20591095e-03, -6.74881926e-03,  6.12326548e-04,  1.43226795e-02,
       -1.32999234e-02,  1.68486610e-02,  1.02900749e-03,  4.77856351e-03,
       -6.03951630e-04,  7.83179700e-03, -7.03747105e-03, -5.22249797e-03,
        4.91724117e-03, -2.94933716e-06, -2.57727923e-03,  3.72699648e-03,
       -6.13499607e-04,  4.13555565e-04,  8.56180268e-04, -6.12876285e-03,
       -3.92249133e-03, -8.56847037e-03,  4.37103631e-03, -7.72171188e-03,
       -7.54889892e-03,  7.12684821e-04,  1.48570742e-02, -4.48371749e-03,
       -9.06115957e-03, -3.59643344e-03,  2.18514819e-03,  1.33445151e-02,
        1.08226184e-02,  5.09223901e-03, -6.91226823e-03, -5.47358580e-03,
        4.01620427e-03, -1.13173760e-03,  4.80033970e-03, -3.47859133e-03,
       -9.78258671e-04,  6.99048769e-03, -6.02292875e-03,  5.65557228e-03,
        9.78637673e-03, -

In [15]:
# check covariances are the same
fed_cov_vec_np - cov_vec_np

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)

### 3. Combine and extract X

In [16]:
n_samples_recreated = np.shape(y)[0]
y_means = np.mean(y, axis = 0)
y_T_inv = np.linalg.inv(np.transpose(y)) # inverse y

In [17]:
def reconstruct_x(cov_vec, x_avg, n_samples = n_samples_recreated):
    term_1 = n_samples * (y_T_inv @ cov_vec)
    term_2 = n_samples * x_avg * (y_T_inv @ y_means)
    return(term_1 + term_2)

#### Dataset

In [18]:
reconstructed_x_j = reconstruct_x(cov_vec_np, x_avg)
reconstructed_x_j - x_j

array([-4.04690353e-07, -1.33005077e-07, -6.81612637e-08, -1.28894311e-07,
        2.51956893e-07,  3.90105574e-07, -1.58739580e-07,  3.08833803e-07,
       -1.62144744e-08, -1.12471542e-07, -2.27422114e-07, -1.85757721e-07,
       -6.98745645e-08, -2.16315885e-07,  6.22957272e-08, -6.01033062e-07,
       -1.59351843e-07, -1.55117937e-07,  6.52265111e-08, -3.77229099e-07,
       -5.01229411e-07,  3.49923259e-07, -3.07670225e-07, -3.18018761e-07,
       -6.57088625e-08,  2.63337802e-07,  2.23782203e-07, -2.07052363e-08,
        3.13867754e-07, -2.15922130e-08,  4.63354699e-07, -1.37070956e-07,
        2.21829780e-07,  6.89353685e-08, -1.54716058e-07,  4.40296919e-08,
       -7.98527999e-09, -6.32684455e-08, -6.11317493e-08, -1.11249211e-07,
        1.86179086e-07, -3.55312906e-07, -3.52414886e-07, -1.54001049e-07,
        3.51779067e-07, -2.39574288e-07, -2.10808986e-07, -1.89074061e-08,
       -1.41257564e-07, -1.41425075e-07,  7.23467120e-09, -2.67777711e-07,
        7.66458291e-08, -

#### TFF

In [19]:
reconstructed_x_j = reconstruct_x(fed_cov_vec_np, fed_x_avg)
reconstructed_x_j - x_j

array([-4.04690353e-07, -1.33005077e-07, -6.81612637e-08, -1.28894311e-07,
        2.51956893e-07,  3.90105574e-07, -1.58739580e-07,  3.08833803e-07,
       -1.62144744e-08, -1.12471542e-07, -2.27422114e-07, -1.85757721e-07,
       -6.98745645e-08, -2.16315885e-07,  6.22957272e-08, -6.01033062e-07,
       -1.59351843e-07, -1.55117937e-07,  6.52265111e-08, -3.77229099e-07,
       -5.01229411e-07,  3.49923259e-07, -3.07670225e-07, -3.18018761e-07,
       -6.57088625e-08,  2.63337802e-07,  2.23782203e-07, -2.07052363e-08,
        3.13867754e-07, -2.15922130e-08,  4.63354699e-07, -1.37070956e-07,
        2.21829780e-07,  6.89353685e-08, -1.54716058e-07,  4.40296919e-08,
       -7.98527999e-09, -6.32684455e-08, -6.11317493e-08, -1.11249211e-07,
        1.86179086e-07, -3.55312906e-07, -3.52414886e-07, -1.54001049e-07,
        3.51779067e-07, -2.39574288e-07, -2.10808986e-07, -1.89074061e-08,
       -1.41257564e-07, -1.41425075e-07,  7.23467120e-09, -2.67777711e-07,
        7.66458291e-08, -

#### Test breakage

In [20]:
def get_y():
    return y

# merge to tensor
@tff.tf_computation(tff.SequenceType(tf.float32))
def local_merge2tensor(vec):
#     out = vec.map(lambda x: tf.repeat(x, n_samples, axis = 0))
    out = vec.batch(n_samples, drop_remainder=True)
#     out = out.map(lambda x: tfp.stats.covariance(x, get_y(), sample_axis=0, event_axis=None))
    return out
str(local_merge2tensor.type_signature)

'(float32* -> float32[150]*)'

In [21]:
# merge to tensor
@tff.federated_computation(tff.type_at_clients(tff.SequenceType(tf.float32)))
def global_merge(vals):
    return tff.federated_map(local_merge2tensor, vals)

In [22]:
fed_cov_vec = global_merge([x_j])
next(fed_cov_vec[0].as_numpy_iterator())

array([0.5745409 , 0.66319996, 0.5159908 , 0.23711313, 0.78899425,
       0.49931702, 0.2555557 , 0.46419847, 0.01365231, 0.9790286 ,
       0.03407234, 0.11002341, 0.549638  , 0.74536407, 0.01489864,
       0.26108602, 0.41571823, 0.15941693, 0.39767984, 0.817516  ,
       0.4835058 , 0.14407529, 0.5692794 , 0.5268348 , 0.52152336,
       0.319054  , 0.0396032 , 0.35738993, 0.71448916, 0.7128584 ,
       0.29400608, 0.26830477, 0.8534324 , 0.7212698 , 0.44379622,
       0.74784213, 0.8377984 , 0.47412047, 0.7792083 , 0.01029759,
       0.1399361 , 0.2610262 , 0.42820555, 0.44912806, 0.20981039,
       0.17931947, 0.28420016, 0.15882327, 0.23933445, 0.49106392,
       0.8219152 , 0.8265646 , 0.589897  , 0.8466324 , 0.00973011,
       0.08570742, 0.93142587, 0.5551172 , 0.08517147, 0.7998447 ,
       0.37038222, 0.307473  , 0.9474603 , 0.23590392, 0.20809764,
       0.10118823, 0.43282503, 0.7063    , 0.93756235, 0.62510633,
       0.49257335, 0.39122316, 0.80741656, 0.08891166, 0.42354

In [23]:
x_j

[0.5745408899144413,
 0.6631999671243912,
 0.5159908168589863,
 0.23711313617972984,
 0.7889942405159143,
 0.49931701764598513,
 0.2555556866511003,
 0.46419848326185775,
 0.013652306585427598,
 0.9790285580713632,
 0.03407233869463566,
 0.11002341248819847,
 0.5496379742094668,
 0.7453640451293252,
 0.01489863860277052,
 0.26108600815362515,
 0.41571823236659666,
 0.15941693206273622,
 0.3976798250232644,
 0.8175160571747737,
 0.4835057772577368,
 0.1440752847619211,
 0.5692793785143307,
 0.5268348106839115,
 0.5215233278525534,
 0.31905401934663025,
 0.0396032037580033,
 0.3573899351895661,
 0.714489141996391,
 0.7128584013926123,
 0.29400607530487266,
 0.26830477476830417,
 0.8534324193614823,
 0.7212698159844527,
 0.4437962091274841,
 0.7478421369300969,
 0.8377984009448465,
 0.4741204603074416,
 0.7792082843786391,
 0.010297590029973214,
 0.1399361003492411,
 0.2610261965171333,
 0.428205541063146,
 0.44912806130787974,
 0.20981038551443099,
 0.17931947528880388,
 0.28420014939307