In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In the paper, we assume each player draws their true parameters i.i.d. (independent and identically distributed) $(\theta_j, \epsilon^2_j) \sim \Theta$, for some joint distribution $\Theta$. $\epsilon^2_j$ represents the amount of noise present in the sampling process for a given player. For mean estimation, $\theta_j$ is a scalar representing the true mean of player $j$, which is what they wish to estimate. 

The true expected MSE each player experiences depends only on $n$, the number of samples they have, and $\sigma^2 = Var(\theta)$, and $\mu_e = \mathbb{E}[\epsilon^2]$. 

For this code, we need a $\Theta$ to draw from. For simplicity, we assume $\theta, \epsilon^2$ are independent of each other. In the paper, we use $\sigma^2 =1$ and $\mu_e = 10$ as common parameters, which are the values below. Note that the results do not depend on the distributions themselves or other values besides $\sigma^2, \mu_e$.

In [2]:
means_dist = stats.norm(loc = 0, scale = 1)
variance_dist = stats.beta(a=8, b=2, scale = 50/4)
print(means_dist.var())
print(variance_dist.mean())

1.0
10.0


calculate_means() calculates the exact expected MSE a set of players experiences. For reference, the equations it is using are provided below: 

Local learning: $$\frac{\mu_e}{n}$$

Uniform federation: 
$$\frac{\mu_e}{N} + \frac{ \sum_{i\ne j}n_i^2  +(N - n_j)^2}{N^2}\sigma^2$$
where $N = \sum_{i=1}^{M}n_i$ and $M$ is the number of players.  
Coarse-grained federation: 
$$\mu_e \left(\frac{w^2}{n_j} + \frac{1-w^2}{N}\right) + \frac{ \sum_{i\ne j}n_i^2  +(N - n_j)^2}{N^2} \cdot (1-w)^2\sigma^2$$
where $w$ is a parameter. 
Fine-grained federation: 
$$\mu_e\sum_{i=1}^{M}v_{ji}^2\cdot \frac{1}{n_i} + \left(\sum_{i\ne j}v_{ji}^2 + \left(\sum_{i\ne j}v_{ji}\right)^2\right)\cdot \sigma^2$$

In [3]:
def calculate_means(var = means_dist.var(), mue = variance_dist.mean(), 
                                n_list = [10, 20, 30], w_best = False, w_list = [0.2, 0.4, 0.6], v_best = False,
                               v_mat = [[0.1, 0.6, 0.3], [0.2, 0.8, 0.0], [0.3, 0.5, 0.2]]):
    '''
    Calculate exact error for mean estimation.  

    Args:
        var: variance of true mean distributions
        mue: mean of true error distribution. 
        n_list: a list of length M (number of players) with the number of samples each has.
        w_best: boolean, if true, calculates error given optimal values for w
        w_list: if w_best is false, a list of w-weights (in [0, 1]) for coarse-grained federation.
        v_best: boolean, if true, calculates error given optimal values for v
        v_mat: a matrix (list of lists) of weights each player uses in fine-grained federation: the rows sum up 
               to 1.
    Returns:
        dataframe with average error for each player, for: local, uniform, coarse-grained, and fine-grained 
        federation.  
    '''
    # dataframe for storing error
    player_error = pd.DataFrame(data = 0.0, index = ['local', 'uniform', 'coarse', 'fine'], 
                                columns = range(len(n_list)))
    N = sum(n_list)
    
    # for each player, calculate their true error 
    for j, n in enumerate(n_list):
        
        # local
        player_error.loc['local'][j] = mue/n
        
        sumsquares = sum([nval**2 for nval in n_list]) - n**2 + (N-n)**2
        
        # uniform
        player_error.loc['uniform'][j] = mue/N + sumsquares * var/(N**2)
        
        # coarse-grained
        if w_best: 
            if len(n_list) == 1: # division by 0 issue if length 1 list - equivalent to local
                w_err = player_error.loc['local'][j]
            else:
                w_err = (mue * (N-n) + var * sumsquares)/((N-n)*N + n*var*sumsquares/mue)
        else:
            w = w_list[j]
            w_err = mue * ( w**2/n + (1-w**2)/N) + ((1-w)**2/(N**2)) * sumsquares* var
        player_error.loc['coarse'][j] = w_err
        
        # fine-grained
        if v_best: 
            # calculate optimal v weights
            V_list = [var + mue/ni for ni in n_list]
            sum_inv = sum([1/Vi for Vi in V_list]) - 1/V_list[j]
            vjj = (1 + var * sum_inv)/(1 + V_list[j] * sum_inv)
            weights = [(V_list[j]-var)/(Vk * (1 + V_list[j]*sum_inv)) for Vk in V_list]
            weights[j] = vjj
            v_vec = pd.DataFrame(weights)
        else:
            v_vec = pd.DataFrame(v_mat[j])
            
        player_error.loc['fine'][j] = (mue * (v_vec**2).T.dot(pd.DataFrame([1/nval for nval in n_list])) + 
                                       var * ((v_vec**2).sum() - v_vec.iloc[j]**2 + (1 - v_vec.iloc[j])**2))[0][0]
        
    return player_error

This returns a dataframe: each column represents a different player and each row represents the error that player experiences under different local or federation strategies. 

In [4]:
calculate_means()

Unnamed: 0,0,1,2
local,1.0,0.5,0.333333
uniform,1.222222,0.888889,0.555556
coarse,0.875556,0.48,0.288889
fine,1.48,0.44,1.208333


Note that calculate_means(), as a default, uses w and v parameters that may not be optimal. It is possible to use calculate_means() with optimal parameters. In this case, coarse-grained federation will always be individually stable (no player wishes to unilaterally deviate) and fine-grained federation will always be core stable (no group of players wishes to deviate). Note that because this table only contains local and grand coalition error rates, it is not possible to verify the core stability property from the results given, but other notebooks in this repo will examine this further.

In [5]:
calculate_means(w_best = True, v_best = True)

Unnamed: 0,0,1,2
local,1.0,0.5,0.333333
uniform,1.222222,0.888889,0.555556
coarse,0.632353,0.394737,0.283333
fine,0.630435,0.391304,0.282609


It is also possible to set the $w$ and $v$ parameters so as to match either local learning or uniform learning. 

In [6]:
# setting values to mimic local learning
calculate_means(w_list = [1.0, 1.0, 1.0], 
                v_mat = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])

Unnamed: 0,0,1,2
local,1.0,0.5,0.333333
uniform,1.222222,0.888889,0.555556
coarse,1.0,0.5,0.333333
fine,1.0,0.5,0.333333


In [7]:
# setting values to mimic uniform learning
calculate_means(w_list = [0.0, 0.0, 0.0], 
                v_mat = [[1.0/6, 2.0/6, 0.5], [1.0/6, 2.0/6, 0.5], [1.0/6, 2.0/6, 0.5]])

Unnamed: 0,0,1,2
local,1.0,0.5,0.333333
uniform,1.222222,0.888889,0.555556
coarse,1.222222,0.888889,0.555556
fine,1.222222,0.888889,0.555556


simulate_means() simulates the process described above: players draw true mean and variance parameters $\theta, \epsilon^2$, then draw data points $Y \sim D_j(\theta, \epsilon^2)$ and calculate empirical means using local learning or some variant of federation. Results should match calculate_means() closely, if the same parameters are used and if sufficient simulations are used. 

In [8]:
def simulate_means(mean_dist = means_dist, err_dist = variance_dist, draws_dist = stats.norm, 
                   n_list = [10, 20, 30], w_list = [0.2, 0.4, 0.6], 
                   v_mat = [[0.1, 0.6, 0.3], [0.2, 0.8, 0.0], [0.3, 0.5, 0.2]], 
                   world_nrun = 100, sample_nrun = 1):
    
    '''
    Simulate mean estimation. 
    

    Args:
        mean_dist: distribution to draw true means from (mean = theta)
        err_dist: distribution to draw true error parameters from (err = epsilon^2)
        draws_dist: distribution each player draws from: with mean theta and variance epsilon^2. 
        n_list: a list of length M (number of players) with the number of samples each has. 
        w_list: a list of w-weights each player uses for coarse-grained federation.  
        v_mat: a matrix (list of lists) of weights each player uses in fine-grained federation: the rows sum up 
               to 1.
        world_nrun: number of times where means and errors are re-drawn
        sample_nrun: for each worldrun, number of times samples are re-drawn
    Returns:
        dataframe with average error for each player, for local, uniform, coarse-grained, and fine-grained 
        federation.  
    '''
    M = len(w_list)
    n_list_pd = pd.DataFrame(n_list)
    w_list_pd = pd.DataFrame(w_list)
    v_mat_pd = pd.DataFrame(v_mat)
    
    # dataframe for storing error
    player_error = pd.DataFrame(data = 0, index = ['local', 'uniform', 'coarse', 'fine'], 
                                columns = range(len(n_list)))
    
    for wn in range(world_nrun):
        # draw means and errors
        means = mean_dist.rvs(M) 
        errors = err_dist.rvs(M)    
        
        for sn in range(sample_nrun): 
            
            # draw samples for each player
            sample_dict = {}
            for i in range(M):
                sample_dict[i] = draws_dist(means[i], np.sqrt(errors[i])).rvs(n_list[i])
            
            # calculate mean estimates
            local_est = pd.DataFrame([sample_dict[i].mean() for i in range(M)])
            uniform_est = local_est.T.dot(n_list_pd)[0][0]/sum(n_list)
            coarse_est = w_list_pd * local_est + (1-w_list_pd) * uniform_est
            fine_est = v_mat_pd.dot(local_est)
            
            # calculate MSE
            player_error.loc['local'] += ((local_est - pd.DataFrame(means))**2).values.flatten()
            player_error.loc['uniform'] += ((uniform_est - pd.DataFrame(means))**2).values.flatten()
            player_error.loc['coarse'] += ((coarse_est - pd.DataFrame(means))**2).values.flatten()
            player_error.loc['fine'] += ((fine_est - pd.DataFrame(means))**2).values.flatten()
    
    player_error = player_error/(world_nrun * sample_nrun)
    
    return player_error

Simulating means and comparing to the true means. 

In [9]:
simulate_means(world_nrun = 1000)

Unnamed: 0,0,1,2
local,1.001074,0.512947,0.344872
uniform,1.182144,0.909853,0.566677
coarse,0.835722,0.49587,0.295387
fine,1.435403,0.426496,1.239961


In [10]:
calculate_means()

Unnamed: 0,0,1,2
local,1.0,0.5,0.333333
uniform,1.222222,0.888889,0.555556
coarse,0.875556,0.48,0.288889
fine,1.48,0.44,1.208333
