In [7]:
%load_ext autoreload
%autoreload 2

import numpy as np
from admm.agents import EventGlobalConsensusTorch
from admm.models import FCNet
from admm.utils import add_params, average_params, sum_params
import torch
from tqdm import tqdm

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Experimenting with SGD and adding parameters

In [2]:
def get_copy(params):
    copy = [torch.zeros(param.shape).copy_(param) for param in params]
    return copy

In [10]:
model1 = FCNet(2,2,1)

copy = get_copy(model1.parameters())

for param in copy:
    param.data = 2*torch.ones(param.shape)
for param in copy:
    print(param)

print('\n----- model 1 -----')
print('before training')
for param in model1.parameters():
    print(param)

# experiment minimizing l(x) = |x - 2|_2
opt = torch.optim.Adam(model1.parameters(), lr=0.01)
loss = torch.Tensor([np.inf])
prev_loss = torch.Tensor([0])
while np.abs(prev_loss.item() - loss.item()) >= 1e-4:
    prev_loss = loss
    opt.zero_grad()
    loss = 0
    for param, copied in zip(model1.parameters(), copy):
        loss += torch.norm(param-copied.data)
    loss.backward()
    opt.step()

print('\nafter training')
for param in model1.parameters():
    print(param)
print('\ncopied')
for param in copy:
    print(param)

tensor([[2., 2.],
        [2., 2.]], grad_fn=<CopyBackwards>)
tensor([2., 2.], grad_fn=<CopyBackwards>)
tensor([[2., 2.]], grad_fn=<CopyBackwards>)
tensor([2.], grad_fn=<CopyBackwards>)

----- model 1 -----
before training
Parameter containing:
tensor([[-0.0287, -0.2323],
        [ 0.2066, -0.4598]], requires_grad=True)
Parameter containing:
tensor([0.4197, 0.3228], requires_grad=True)
Parameter containing:
tensor([[-0.3302,  0.6244]], requires_grad=True)
Parameter containing:
tensor([0.1056], requires_grad=True)

after training
Parameter containing:
tensor([[1.9997, 2.0012],
        [1.9990, 1.9938]], requires_grad=True)
Parameter containing:
tensor([2.0000, 1.9997], requires_grad=True)
Parameter containing:
tensor([[2.0014, 1.9974]], requires_grad=True)
Parameter containing:
tensor([1.9997], requires_grad=True)

copied
tensor([[2., 2.],
        [2., 2.]], grad_fn=<CopyBackwards>)
tensor([2., 2.], grad_fn=<CopyBackwards>)
tensor([[2., 2.]], grad_fn=<CopyBackwards>)
tensor([2.], grad_f

### Event-Based ADMM with Torch

In [8]:
# Initial lambdas must sum to 0!
delta = 0.01
rho = 0.01
N = 4

agents = [
    EventGlobalConsensusTorch(
        N=N, 
        rho=rho, 
        model=FCNet(2,3,1),
        delta=delta
    ) 
    for _ in range(N)
]

for agent in agents:
    agent.primal_avg = average_params([agent.model.parameters() for agent in agents])
    for param in agent.primal_avg:
        print(param)

for agent in agents:
    for param in agent.primal_avg:
        print(param)

print('\nbefore training')
for agent in agents:
    for param in agent.model.parameters(): print(param)

tensor([[-0.1430, -0.1983],
        [-0.1719,  0.3287],
        [-0.0490, -0.1276]], grad_fn=<DivBackward0>)
tensor([-0.1743, -0.0248, -0.0670], grad_fn=<DivBackward0>)
tensor([[ 0.0837, -0.3339,  0.1893]], grad_fn=<DivBackward0>)
tensor([0.0739], grad_fn=<DivBackward0>)
tensor([[-0.1430, -0.1983],
        [-0.1719,  0.3287],
        [-0.0490, -0.1276]], grad_fn=<DivBackward0>)
tensor([-0.1743, -0.0248, -0.0670], grad_fn=<DivBackward0>)
tensor([[ 0.0837, -0.3339,  0.1893]], grad_fn=<DivBackward0>)
tensor([0.0739], grad_fn=<DivBackward0>)
tensor([[-0.1430, -0.1983],
        [-0.1719,  0.3287],
        [-0.0490, -0.1276]], grad_fn=<DivBackward0>)
tensor([-0.1743, -0.0248, -0.0670], grad_fn=<DivBackward0>)
tensor([[ 0.0837, -0.3339,  0.1893]], grad_fn=<DivBackward0>)
tensor([0.0739], grad_fn=<DivBackward0>)
tensor([[-0.1430, -0.1983],
        [-0.1719,  0.3287],
        [-0.0490, -0.1276]], grad_fn=<DivBackward0>)
tensor([-0.1743, -0.0248, -0.0670], grad_fn=<DivBackward0>)
tensor([[ 0.083

### Run simulation

In [9]:
comm = 0

t_max = 200
for t in tqdm(range(t_max)):
    
    # Primal Update
    for agent in agents:
        agent.primal_update()

    # Residual update in the case of communication
    C = []
    for agent in agents:
        if agent.broadcast: 
            comm += 1
            C.append(agent.residual)
    if C:
        # If communicaiton set isn't empty
        residuals = [x for x in sum_params(C)]
        for agent in agents:
            add_params(agent.primal_avg, residuals)

    # Dual update
    for agent in agents:
        agent.dual_update()

load = comm/(t_max*len(agents))
print(f'Communication load = {load}')

print('\nParams')
for agent in agents:
    for param in agent.model.parameters():
        print(param)

print('\nAverage')
for agent in agents:
    for param in agent.primal_avg:
        print(param)

100%|██████████| 200/200 [00:15<00:00, 13.01it/s]

Communication load = 0.04

Params
Parameter containing:
tensor([[0.9995, 1.0000],
        [1.0001, 1.0005],
        [1.0005, 1.0003]], requires_grad=True)
Parameter containing:
tensor([1.0004, 0.9993, 1.0005], requires_grad=True)
Parameter containing:
tensor([[1.0005, 1.0005, 0.9996]], requires_grad=True)
Parameter containing:
tensor([0.9992], requires_grad=True)
Parameter containing:
tensor([[0.9997, 0.9997],
        [1.0003, 0.9994],
        [1.0004, 1.0003]], requires_grad=True)
Parameter containing:
tensor([1.0004, 1.0007, 0.9996], requires_grad=True)
Parameter containing:
tensor([[0.9994, 1.0006, 1.0006]], requires_grad=True)
Parameter containing:
tensor([0.9995], requires_grad=True)
Parameter containing:
tensor([[0.9995, 1.0011],
        [0.9998, 1.0003],
        [1.0005, 1.0007]], requires_grad=True)
Parameter containing:
tensor([1.0008, 1.0007, 0.9992], requires_grad=True)
Parameter containing:
tensor([[1.0003, 0.9999, 1.0001]], requires_grad=True)
Parameter containing:
tensor(


