In [15]:
import os
import sys
PYTHONPATH = os.path.abspath('../')
sys.path.append(PYTHONPATH)

import numpy as np
import tensorflow_federated as tff

from idputils.sample import get_weights
from helpers import create_budgets

In [31]:
num_clients = len(tff.simulation.datasets.emnist.load_data()[0].client_ids)
num_clients

3383

In [32]:
def create_client_budgets(num_clients: int, possible_budgets: np.array, budget_ratios: np.array, seed: int| None = None):
    if seed:
        np.random.seed(seed)
    return np.random.choice(possible_budgets, size=num_clients, replace=True, p=budget_ratios)


In [39]:
def get_expected_clients(client_budgets: np.array, group_sample_rates: np.array, available_budgets: np.array):
    group_sizes = [len(list(filter(lambda x: x == b, client_budgets))) for b in available_budgets]
    return [r * s for r, s in zip(group_sample_rates, group_sizes)]

## EMNIST Setup

In [42]:
available_budgets = np.array([1.0, 2.0, 3.0])
budget_distribution = np.array([0.34, 0.43, 0.23])
num_clients = len(tff.simulation.datasets.emnist.load_data()[0].client_ids)
client_budgets = create_client_budgets(num_clients, possible_budgets=available_budgets, budget_ratios=budget_distribution, seed=42)
target_delta = 1e-5
clients_per_round = 50
default_sample_rate = clients_per_round / num_clients
rounds = 100
noise_multiplier, sampling_rates = get_weights(client_budgets, target_delta=target_delta, default_sample_rate=default_sample_rate, steps=rounds)
noise_multiplier, sum(get_expected_clients(client_budgets, sampling_rates, available_budgets)), sampling_rates

(0.9207538146972657,
 50.07829634979559,
 [0.0032123941990686573, 0.016581548898403648, 0.028790681186796018])

In [45]:
# modify budgets
mod_budgets = {1.0: 10.0, 2.0: 20.0, 3.0: 30.0}
modified_client_budgets = np.array([mod_budgets[b] for b in client_budgets])
noise_multiplier, sampling_rates = get_weights(modified_client_budgets, target_delta=target_delta, default_sample_rate=default_sample_rate, steps=rounds)
noise_multiplier, sum(get_expected_clients(client_budgets, sampling_rates, available_budgets)), sampling_rates

[20. 30. 20. 20. 10. 10. 10. 30. 20. 20.]


(0.38702806091308595,
 50.084949736611094,
 [0.003042049052245129, 0.015270585260132297, 0.03151993539971974])

In [49]:
# modify distribution
modified_client_budgets = create_client_budgets(num_clients, possible_budgets=available_budgets, budget_ratios=np.array([0.54, 0.37, 0.09]), seed=42)
noise_multiplier, sampling_rates = get_weights(modified_client_budgets, target_delta=target_delta, default_sample_rate=default_sample_rate, steps=rounds)
noise_multiplier, sum(get_expected_clients(modified_client_budgets, sampling_rates, available_budgets)), sampling_rates

(0.9988749084472656,
 50.072573970618585,
 [0.00621956771063088, 0.022212166959965706, 0.03606461879507059])

In [50]:
# modify number of clients
modified_num_clients = 1_000
modified_client_budgets = create_client_budgets(modified_num_clients, possible_budgets=available_budgets, budget_ratios=budget_distribution)
noise_multiplier, sampling_rates = get_weights(modified_client_budgets, target_delta=target_delta, default_sample_rate=clients_per_round / modified_num_clients, steps=rounds)
noise_multiplier, sum(get_expected_clients(modified_client_budgets, sampling_rates, available_budgets)), sampling_rates

(1.5072723388671876,
 49.9949233910807,
 [0.027532694671359104, 0.0533675350948839, 0.07819891564507606])

In [51]:
# modify rounds
noise_multiplier, sampling_rates = get_weights(client_budgets, target_delta=target_delta, default_sample_rate=default_sample_rate, steps=1_000)
noise_multiplier, sum(get_expected_clients(client_budgets, sampling_rates, available_budgets)), sampling_rates

(1.3266173095703127,
 49.94680993380449,
 [0.00824321621357987, 0.015718189086484986, 0.02272109322012547])

In [52]:
# modify sampling rate
noise_multiplier, sampling_rates = get_weights(client_budgets, target_delta=target_delta, default_sample_rate=200 / num_clients, steps=rounds)
noise_multiplier, sum(get_expected_clients(client_budgets, sampling_rates, available_budgets)), sampling_rates

(1.6830447998046876,
 199.80638159984233,
 [0.03277757031864229, 0.06270887666535079, 0.09150944843476301])