# Federated Learning

In [2]:
import numpy as np
import torch as th
from torch.autograd import Variable

In [3]:
# two servers
data_size = 10
np.random.seed(0)
X_1 = [(np.random.rand(data_size)*10)] # one variable with 100 samples
X_2 = [(np.random.rand(data_size+20)*5)] # one variable with 120 samples
X_servers = [X_1, X_2]
sample_size_max = data_size+20

In [4]:
def sampleCov(x, y):
    """compute sample covariance"""
    return 1. / (x.size - 1) * np.dot(x,y) - x.size / (x.size - 1) * np.mean(x) * np.mean(y)

In [5]:
# build linear model with PyTorch
class LinearRegressionModel(th.nn.Module):
 
    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.linear = th.nn.Linear(1, 1)  # One in and one out
 
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

In [6]:
def sampleCovTorch(x, y, gradient_decent_steps=10):
    """compute sample covariance with PyTorch"""
    model = LinearRegressionModel()
    criterion = th.nn.MSELoss()
    optimizer = th.optim.SGD(model.parameters(), lr = 0.01)
    
    n_j = x.size
    x_tensor = Variable(th.from_numpy(x.reshape((n_j, 1)).astype(np.float32)))
    y_tensor = Variable(th.from_numpy(y.reshape((n_j, 1)).astype(np.float32)))
    for epoch in range(gradient_decent_steps):

        # Forward pass: Compute predicted y by passing
        # x to the model
        pred_y = model(x_tensor)

        # Compute and print loss
        loss = criterion(pred_y, y_tensor)

        # Zero gradients, perform a backward pass,
        # and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    return model.linear.weight.detach().numpy()[0][0]

In [7]:
def rmse(x_estimate, x_true, R):
    """
    Args:
        x_estimate: np.array; estimated variable
        x_true: np.array; true variable
        R: int; number of already performed iterations

    Returns: float; residual mean squared error

    """
    error_sum = 1./R * np.sum(x_estimate - x_true)
    nominator = np.linalg.norm(error_sum)**2
    denominator = np.linalg.norm(x_true) ** 2
    rmse = nominator / denominator
    return rmse

In [8]:
def covariance_based_attack_algorithm(X_servers, k, R=1, noise_levels=None, gradient_decent_steps=1):
    """
    Args:
        X_servers: list; list of servers, each server is a list of variables
        k: int; which variable to steal
        R: int; number of iterations
        noise_levels: np.array (optional); noise levels for each server (gamma, epsilon)
        gradient_decent_steps: int; number of gradient decent steps for PyTorch model

    Returns: np.array; errors for each server after every iteration

    """
    if R == 0:
        return np.array([np.Inf])
    n_servers = len(X_servers)
    
    # get maximal length to create numpy array
    n_j_max = 0
    for X_j in X_servers:
        if X_j[k].size > n_j_max:
            n_j_max = X_j[k].size
    
    X_stolen = np.zeros((R, n_servers, n_j_max))
    
    if noise_levels is not None:
        noise_gamma = noise_levels[:, 0]
        noise_epsilon = noise_levels[:, 1]
    
    error = np.zeros((n_servers, R))

    # on client side
    # mean, and covariance performed on server, just return solutions
    for r in range(R):
        for j, X_j in enumerate(X_servers):

            n_j = X_j[k].size # number of samples for this variable

            # create linearly independent vectors
            Y = np.eye(n_j)

            # initialise V, m 
            V = np.zeros(n_j)
            m = np.zeros(n_j)

            # compute sample means and covariances
            for i in range(n_j):
                m[i] = np.mean(Y[i, :]) # on client
                y_var = np.var(Y[i, :]) # on client
                V[i] = sampleCovTorch(X_j[k], Y[i, :], gradient_decent_steps) * y_var # function on server
                #V[i] = sampleCov(X_j[k], Y[i, :]) # function on server

            X_jk_mean = np.mean(X_j[k]) # function on server

            # add noise to data
            if noise_levels is not None:
                X_jk_mean += np.random.normal(0, scale=np.sqrt(noise_gamma[j])) # one dimensional random variable
            if noise_levels is not None:
                V += np.random.multivariate_normal(np.zeros(V.size), cov=np.eye(V.size)*np.sqrt(noise_epsilon[j])) # random vector

            # client side
            # compute inverse of Y
            Y_inv = np.linalg.inv(Y.T)
            # recover data on client side
            X_stolen[r, j, :n_j] = (n_j - 1) * np.matmul(Y_inv, V) + n_j * X_jk_mean * np.matmul(Y_inv, m)

            # average out the noise (if any)
            X_stolen_current = np.sum(X_stolen, axis=0) / (r+1)

            # compute error
            error[j, r] = rmse(X_stolen_current[j, :n_j], X_j[k], r + 1)
    return error
            

In [9]:
k=0
error = covariance_based_attack_algorithm(X_servers, k=k, R=100, noise_levels=np.array([[0.3, 0.3],[0.3, 0.3]]),
                                          gradient_decent_steps=1)

tol=1e-3
for j, X_j in enumerate(X_servers):
    if error[j, -1] == 0:
        print('exact success on server', j+1)
    elif error[j, -1] < tol:
        print('success on server', j+1)
        print('error below tolerance:', error[j, -1].round(5))
    else:
        print('failed on server', j+1)
        print('error:', error[j, -1].round(5))

success on server 1
error below tolerance: 0.0
success on server 2
error below tolerance: 0.00016
