In [24]:
import functools
from typing import Callable, Optional

import collections

from typing import Any, Callable, Dict, List, Optional


from absl import logging
import tensorflow as tf
import tensorflow_federated as tff

from utils import training_loop
from utils import training_loop_importance
from utils import training_utils
from utils.datasets import emnist_dataset
from utils.models import emnist_models


from optimization.shared import importance_gradient_schedule as importance_schedule
from optimization.shared import importance_aggregation_factory

import nest_asyncio
nest_asyncio.apply()

In [14]:
#
client_batch_size = 20
client_epochs_per_round=1
max_batches_per_client=100
clients_per_round = 10

In [4]:
emnist_train, _ = emnist_dataset.get_emnist_datasets(
  client_batch_size,
  client_epochs_per_round,
  max_batches_per_client=max_batches_per_client,
  only_digits=False)

input_spec = emnist_train.create_tf_dataset_for_client(
  emnist_train.client_ids[0]).element_spec

model_builder = functools.partial(
        emnist_models.create_two_hidden_layer_model, only_digits=False)

loss_builder = tf.keras.losses.SparseCategoricalCrossentropy
metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()]

def tff_model_fn() -> tff.learning.Model:
    return tff.learning.from_keras_model(
        keras_model=model_builder(),
        input_spec=input_spec,
        loss=loss_builder(),
        metrics=metrics_builder())

In [6]:
client_optimizer_fn = tf.keras.optimizers.SGD
server_optimizer_fn = tf.keras.optimizers.SGD

client_lr_schedule = lambda _: 0.1
server_lr_schedule = lambda _: 0.1

In [10]:
client_lr_schedule = lambda _: 0.1
server_lr_schedule = lambda _: 0.1
def iterative_process_builder(
    model_fn: Callable[[], tff.learning.Model],
    client_weight_fn: Optional[Callable[[Any], tf.Tensor]] = None,
) -> tff.templates.IterativeProcess:

    factory = importance_aggregation_factory.ImportanceSamplingFactory(clients_per_round)
    weights_type = importance_aggregation_factory.weights_type_from_model_fn(model_fn)
    importance_aggregation_process = factory.create(
    value_type = weights_type,
    weight_type = tff.TensorType(tf.float32))

    return importance_schedule.build_fed_avg_process(
      model_fn=model_fn,
      client_optimizer_fn=client_optimizer_fn,
      client_lr=client_lr_schedule,
      server_optimizer_fn=server_optimizer_fn,
      server_lr=server_lr_schedule,
      aggregation_process = importance_aggregation_process)
 

In [15]:
training_process = iterative_process_builder(model_fn = tff_model_fn)

In [16]:
client_datasets_fn = training_utils.build_availability_client_datasets_fn(
      train_dataset = emnist_train, 
      train_clients_per_round = clients_per_round, 
      beta = 0.1)

In [17]:
round_num=0

In [19]:
federated_train_data, federated_weights,r_vec,idx_ids,avail = client_datasets_fn(round_num)

In [25]:
initial_state = training_process.initialize()

In [27]:
state2, round_metrics = training_process.next(initial_state,
                                              federated_train_data, federated_weights.numpy().tolist())

In [30]:
initial_state.model.trainable[0][0,0]

-0.047363598

In [31]:
state1.model.trainable[0][0,0]

-0.04736358

In [33]:
federated_weights, r_vec

(<tf.Tensor: shape=(10,), dtype=float32, numpy=
 array([0.00481723, 0.00312181, 0.00397017, 0.00509929, 0.00538122,
        0.00509929, 0.00397017, 0.00481723, 0.00340474, 0.00425267],
       dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(3400,) dtype=float32, numpy=
 array([0.00046105, 0.00048667, 0.00040982, ..., 0.0001793 , 0.0001793 ,
        0.00020491], dtype=float32)>)

In [34]:
0.004/10

0.0004

In [36]:
0.1/max(federated_weights/10)

<tf.Tensor: shape=(), dtype=float32, numpy=185.83156>

In [37]:
0.00312181*50

0.1560905

In [39]:
0.00481723*20

0.09634459999999999

In [40]:
6/(0.99*6+0.01)

1.008403361344538

In [41]:
r_vec

<tf.Variable 'Variable:0' shape=(3400,) dtype=float32, numpy=
array([0.00046105, 0.00048667, 0.00040982, ..., 0.0001793 , 0.0001793 ,
       0.00020491], dtype=float32)>

In [42]:
(0.00046105-0.01)/0.99

-0.00963530303030303

In [44]:
p_vector = [ ]
for client_id in emnist_train.client_ids:
    dataset = emnist_train.create_tf_dataset_for_client(client_id)
    p_vector.append(len(list(dataset)))
p_vector = np.array(p_vector)/sum(p_vector)

  input_tree = dict(_yield_sorted_items(input_tree))
  input_tree = dict(_yield_sorted_items(input_tree))


NameError: name 'np' is not defined

In [46]:
import numpy as np

In [48]:
p_vector = np.array(p_vector)/sum(p_vector)

In [49]:
p_vector

array([0.00051228, 0.00054074, 0.00045536, ..., 0.00019922, 0.00019922,
       0.00022768])

In [50]:
0.00051228/(0.99*0.00051228+0.01)

0.048755337932890164

In [51]:
federated_weights

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([0.00481723, 0.00312181, 0.00397017, 0.00509929, 0.00538122,
       0.00509929, 0.00397017, 0.00481723, 0.00340474, 0.00425267],
      dtype=float32)>

In [52]:
idx_ids

<tf.Tensor: shape=(10,), dtype=int32, numpy=array([ 47,  81,  84,  90,  92, 145, 255, 328, 401, 406], dtype=int32)>

In [55]:
p_vector[47]/(0.9*p_vector[47]+0.1)

0.004817228676678945

In [64]:
beta = 0.002
rk = p_vector[47]

for i in range(100):
    rk = ((1-beta)*rk+beta)
    print(f'r_k: {rk}  -  pk/rk = {p_vector[47]/rk}')

r_k: 0.0024828528331957765  -  pk/rk = 0.19486474094452086
r_k: 0.0044778871275293845  -  pk/rk = 0.10804659884560473
r_k: 0.006468931353274325  -  pk/rk = 0.07479140645064555
r_k: 0.008455993490567776  -  pk/rk = 0.05721627797889644
r_k: 0.01043908150358664  -  pk/rk = 0.04634703483997462
r_k: 0.012418203340579467  -  pk/rk = 0.03896058559156177
r_k: 0.014393366933898307  -  pk/rk = 0.033614127699656056
r_k: 0.01636458020003051  -  pk/rk = 0.029565101470990538
r_k: 0.018331851039630448  -  pk/rk = 0.02639234156431472
r_k: 0.020295187337551188  -  pk/rk = 0.02383917261255704
r_k: 0.022254596962876086  -  pk/rk = 0.021740248765284215
r_k: 0.024210087768950332  -  pk/rk = 0.019984251141979294
r_k: 0.02616166759341243  -  pk/rk = 0.018493487558334843
r_k: 0.028109344258225602  -  pk/rk = 0.017212086831320687
r_k: 0.030053125569709148  -  pk/rk = 0.016098840469082932
r_k: 0.03199301931856973  -  pk/rk = 0.01512268877552424
r_k: 0.033929033279932595  -  pk/rk = 0.014259777758838221
r_k: 0.0

In [61]:
1001/2

500.5

In [62]:
2/1001

0.001998001998001998

In [74]:
NUM_USER=5
samples_per_user = 100*np.random.lognormal(4, 2, (NUM_USER)).astype(int) + 50

In [75]:
samples_per_user

array([ 4850,  3650,  4650,  2150, 11950])

In [76]:
 np.random.normal(0, 1, 10)

array([ 1.63119485, -0.1506957 , -0.61193213,  2.1335591 , -0.2654838 ,
       -0.45610663, -0.78061412, -0.4595196 ,  0.13269612,  1.48245633])

In [77]:
diction = {'a':[1,2,3], 'b' : [4,5],'train user' : np.array([1,2,3]) }

In [78]:
np.save('dictio.npy', diction)

In [80]:
holita = np.load('dictio.npy', allow_pickle=True)

In [84]:
holita['a']

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [86]:
xx = np.random.multivariate_normal(mean = np.zeros(10), cov = np.eye(10),size = 100)

In [89]:
xx[:75].shape

(75, 10)

# TEST FUNCTION

In [1]:
import functools
from typing import Callable, Optional

from absl import logging
import tensorflow as tf
import tensorflow_federated as tff

from utils import test
from utils.datasets import synthetic_dataset
from utils import training_utils

In [2]:
train_data, test_data, fed_test_data = synthetic_dataset.generate_federated_softmax_data(
    batch_size = 20, 
    client_epochs_per_round= 1,
    test_batch_size = 100,
    alpha=1,
    beta=1,
    iid = True, 
    num_users=100)


In [3]:
def create_logistic_regression_model():
  """Logistic regression model.

  Returns:
    An uncompiled `tf.keras.Model`.
  """
  model = tf.keras.models.Sequential([
      tf.keras.layers.Dense(
        10, 
        input_shape = (60,), 
        ),
      ])
  return model


In [4]:

input_spec = train_data.create_tf_dataset_for_client(
  train_data.client_ids[0]).element_spec


model_builder = functools.partial(
    create_logistic_regression_model)

In [5]:
loss_builder = tf.keras.losses.SparseCategoricalCrossentropy
metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()]

def tff_model_fn() -> tff.learning.Model:
    return tff.learning.from_keras_model(
        keras_model=model_builder(),
        input_spec=input_spec,
        loss=loss_builder(),
        metrics=metrics_builder())


In [6]:
evaluate_fn = training_utils.build_evaluate_fn(
      eval_dataset=test_data,
      model_builder=model_builder,
      loss_builder=loss_builder,
      metrics_builder=metrics_builder)
test_fn = training_utils.build_unweighted_test_fn(
      federated_eval_dataset=fed_test_data,
      model_builder=model_builder,
      loss_builder=loss_builder,
      metrics_builder=metrics_builder)


In [7]:
def _setup_outputs(root_output_dir,
                   experiment_name,
                   hparam_dict,
                   write_metrics_with_bz2=True,
                   rounds_per_profile=0):
  """Set up directories for experiment loops, write hyperparameters to disk."""

  if not experiment_name:
    raise ValueError('experiment_name must be specified.')

  create_if_not_exists(root_output_dir)

  checkpoint_dir = os.path.join(root_output_dir, 'checkpoints', experiment_name)
  create_if_not_exists(checkpoint_dir)
  checkpoint_mngr = checkpoint_manager.FileCheckpointManager(checkpoint_dir)

  results_dir = os.path.join(root_output_dir, 'results', experiment_name)
  create_if_not_exists(results_dir)
  metrics_mngr = metrics_manager.ScalarMetricsManager(
      results_dir, use_bz2=write_metrics_with_bz2)

  summary_logdir = os.path.join(root_output_dir, 'logdir', experiment_name)
  create_if_not_exists(summary_logdir)
  summary_writer = tf.summary.create_file_writer(summary_logdir)

  if hparam_dict:
    hparam_dict['metrics_file'] = metrics_mngr.metrics_filename
    hparams_file = os.path.join(results_dir, 'hparams.csv')
    utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file)

  logging.info('Writing...')
  logging.info('    checkpoints to: %s', checkpoint_dir)
  logging.info('    metrics csv to: %s', metrics_mngr.metrics_filename)
  logging.info('    summaries to: %s', summary_logdir)

  @contextlib.contextmanager
  def profiler(round_num):
    if (rounds_per_profile > 0 and round_num % rounds_per_profile == 0):
      with tf.profiler.experimental.Profile(summary_logdir):
        yield
    else:
      yield

  return checkpoint_mngr, metrics_mngr, summary_writer, profiler


def _write_metrics(metrics_mngr, summary_writer, metrics, round_num):
  """Atomic metrics writer which inlines logic from MetricsHook class."""
  if not isinstance(metrics, dict):
    raise TypeError('metrics should be type `dict`.')
  if not isinstance(round_num, int):
    raise TypeError('round_num should be type `int`.')

  flat_metrics = metrics_mngr.update_metrics(round_num, metrics)
  logging.info('Evaluation at round {:d}:\n{!s}'.format(
      round_num, pprint.pformat(flat_metrics)))

  # Also write metrics to a tf.summary logdir
  with summary_writer.as_default():
    for name, val in flat_metrics.items():
      tf.summary.scalar(name, val, step=round_num)
