# Paper reproduction: Communication-Efficient Distributed SGD using Preamble-based Random Access by Choi Available at https://arxiv.org/abs/2105.09427

Straighfoward reproduction without classes

## Imports and Definitions

In [37]:
# Import needed libraries
import numpy as np # arrays
import matplotlib.pyplot as plt #ploting
import scipy.optimize as sciopt    # linprog
from scipy.optimize import nnls # NNLS SOLVER

In [38]:
# Definitions
K = 10 # Number of datasets distributed over K devices (i.e., 1 data set per device)
numBlocks = 10 # amount of blocks in a gradient vector
blockLength = 8 # block length
L = blockLength*numBlocks # Lenght of w
u =  0.01 # step size or learning rate
T = 1000 # Number of iterations

## Auxiliary functions

Construct the codebook as a scaled cross polytope:
$$ \cal{C}_{cp} = \{ \pm \textrm{R}e_l : \{1,...,\textrm{L}\}\} $$
with $ R = \sqrt{L} $, with $L$ being the block length.

In [39]:
def construct_codebook(blockLength):
    # Construct the codebook as a scaled cross polytope
    R = np.sqrt(blockLength)
    codebook = np.concatenate(((np.eye(blockLength)*R),(np.eye(blockLength)*(-R))),axis=0) + 0 # add zero to fix -0.0 issue
    return codebook

In [40]:
def resolve_quantized_gradient(codebook,norms,cw_indexs):
    # resolves the quantized subvectors into one gradient vector
    g_resolved = np.array([])
    
    for idx,norm in enumerate(norms):
        
        g_resolved = np.concatenate((g_resolved,norm*codebook[idx]),axis=0)
    
    return g_resolved

In [41]:
def quantize(codebook,numBlocks,blockLength,gradient):
        
        codebook = codebook
        
        norms = np.zeros(numBlocks)
        c_idx = np.zeros(numBlocks, int)
        
        for block in range(numBlocks):
            
            begin = block*blockLength
            end = block*blockLength+blockLength
            
            v_d = gradient[begin:end]
            v_til = (v_d)/np.linalg.norm(v_d)
            norms[block] = np.linalg.norm(v_d)

            # solving the linear system
            M = codebook.shape[0]
            A = np.concatenate((codebook.T,np.ones((1,M))),axis=0)
            b = np.concatenate((v_til,np.ones(1)),axis=0)
            
            
            a_vec = nnls(A, b)[0] # Using Non-negative Least Squares
            ## scipy.optimize.nnls solves the KKT (Karush-Kuhn-Tucker) conditions for the non-negative least squares problem.

            c_idx[block] = np.random.choice(M, p=a_vec) # use a_vec as probabilities for choosing the codeword
            # print(c_idx[block])
            # a_probs[block] = a_vec[c_idx[block]]
            
        return norms, c_idx # returns the norms and the indexs of the codewords

## Testing

In [43]:
true_g = np.zeros(L)
est_g = np.zeros(L)

for k in range(K):
    
    gradient = np.random.normal(size = L)
    codebook = construct_codebook(blockLength)
    norms, qidx = quantize(codebook,numBlocks,blockLength,gradient)
    grad_resolved = resolve_quantized_gradient(codebook,norms,qidx)
    true_g += gradient*(1/K)
    est_g += grad_resolved*(1/K)
    
mse =  np.sum(np.abs(true_g-est_g)**2)
    
print(mse)    

592.0456928981135
