https://gist.github.com/saranshmanu/3e2807409a2838a3e221186ef5528bc7#file-federated_learning-ipynb

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install syft==0.2.8

Collecting syft==0.2.8
[?25l  Downloading https://files.pythonhosted.org/packages/69/eb/e7ad7f909c53477f9fbe732aaa5cf036122428268e5239b18344604bc8f8/syft-0.2.8-py3-none-any.whl (415kB)
[K     |████████████████████████████████| 419kB 8.8MB/s 
[?25hCollecting websockets~=8.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/bb/d9/856af84843912e2853b1b6e898ac8b802989fcf9ecf8e8445a1da263bf3b/websockets-8.1-cp36-cp36m-manylinux2010_x86_64.whl (78kB)
[K     |████████████████████████████████| 81kB 7.4MB/s 
[?25hCollecting tblib~=1.6.0
  Downloading https://files.pythonhosted.org/packages/0d/de/dca3e651ca62e59c08d324f4a51467fa4b8cbeaafb883b5e83720b4d4a47/tblib-1.6.0-py2.py3-none-any.whl
Collecting torch~=1.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (753.4MB)
[K     |████████████████████████████████| 753.4MB 22kB/s 
[?25hCollecting psutil==5.7.0

In [None]:
import syft as sy 

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from syft.frameworks.torch.fl import utils

import random
import time
import json
import copy
import os
import glob

In [None]:
hook = sy.TorchHook(torch)
smart_meter1 = sy.VirtualWorker(hook, id="sm1")
smart_meter2 = sy.VirtualWorker(hook, id="sm2")
compute_nodes = [smart_meter1, smart_meter2]

In [None]:
class Parser:
    def __init__(self):
        self.epochs = 200
        self.lr = 0.001
        self.test_batch_size = 8                                                # number here is [A] and should be equal to [B]
        self.batch_size = 8
        self.log_interval = 10
        self.seed = 1
        self.no_cuda = False
    
args = Parser()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
device

device(type='cuda')

In [None]:
path='/content/drive/My Drive/GP | Smart Meter | CIC/Datasets'

In [None]:
data = pd.read_csv(path + "/sample_data_.csv")

In [None]:
data = data.drop(['Unnamed: 0','day'], axis = 1)
print(data[:])

# data --> [2:]
# target --> [only second column]

           LCLid  energy_sum  temperatureMax  ...  month  season  year
0      MAC001497       4.364           17.77  ...      7       2  2012
1      MAC003176      10.340           20.82  ...     10       0  2013
2      MAC001679       4.236           27.12  ...      9       0  2012
3      MAC001544      13.980           15.87  ...     10       0  2013
4      MAC000655       6.663            8.27  ...      3       1  2013
...          ...         ...             ...  ...    ...     ...   ...
39995  MAC005024       4.794           20.33  ...      6       2  2013
39996  MAC002257       3.277           11.00  ...     11       0  2013
39997  MAC003571       4.108            7.30  ...      3       1  2013
39998  MAC004038       1.269           22.42  ...      8       2  2013
39999  MAC004163       7.357           24.62  ...      7       2  2013

[40000 rows x 34 columns]


In [None]:
features = data.drop(['LCLid','energy_sum'], axis = 1)
features = features.to_numpy()  # inputs <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
target = data[['energy_sum']]
target = target.to_numpy()      # output <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

In [None]:
split_frac = 0.8
split_idx= int (len(features)*split_frac)
train_x,test_x = features[:split_idx], features[split_idx:]
train_y,test_y = target[:split_idx], target[split_idx:]

train_y = train_y.ravel()
test_y = test_y.ravel()

print("\t\t\t Feature shapes:")
print("Train set: \t\t{}\n".format(train_x.shape), "Test set: \t\t{}\n".format(test_x.shape))

			 Feature shapes:
Train set: 		(32000, 32)
 Test set: 		(8000, 32)



In [None]:
train = TensorDataset(torch.from_numpy(train_x).float(), torch.from_numpy(train_y).float())
test = TensorDataset(torch.from_numpy(test_x).float(), torch.from_numpy(test_y).float())
train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=args.test_batch_size, shuffle=True)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(256, 128, bias=True)                                          # this number here is [C] and should be equal to [D]
        self.fc2 = nn.Linear(128, 64, bias=True)
        self.fc3 = nn.Linear(64, 32, bias=True)
        self.fc4 = nn.Linear(32, 16, bias=True)
        self.fc5 = nn.Linear(16, 8, bias=True)                                          # this number here is [B] and should be equal to [A]

    def forward(self, x):
        # print(x.shape)  # (8, 32) --> 1D vector of 8*32 = 256
        x = x.view(-1, 256)                                                     # this number here is [D] and should be equal to [C]
        x = F.leaky_relu(self.fc1(x))
        x = F.relu_(self.fc2(x))
        x = F.relu6(self.fc3(x))
        x = F.rrelu(self.fc4(x))
        x = torch.tanh(self.fc5(x)) #self.fc6(x)
        return x

Though data will be available offline for federated learning with the workers but here we are sending the data over to the workers for training with ondevice capability

In [None]:
remote_dataset = (list(), list())
train_distributed_dataset = []

for batch_idx, (data,target) in enumerate(train_loader):
    data = data.send(compute_nodes[batch_idx % len(compute_nodes)])
    target = target.send(compute_nodes[batch_idx % len(compute_nodes)])
    remote_dataset[batch_idx % len(compute_nodes)].append((data, target))

In [None]:
smart_meter1_model = Net()
smart_meter2_model = Net()
smart_meter1_optimizer = optim.SGD(smart_meter1_model.parameters(), lr=args.lr)
smart_meter2_optimizer = optim.SGD(smart_meter2_model.parameters(), lr=args.lr)

In [None]:
models = [smart_meter1_model, smart_meter2_model]
optimizers = [smart_meter1_optimizer, smart_meter2_optimizer]

In [None]:
model = Net()
model

Net(
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=16, bias=True)
  (fc5): Linear(in_features=16, out_features=8, bias=True)
)

In [None]:
def update(data, target, model, optimizer):
    model.send(data.location)
    optimizer.zero_grad()
    prediction = model(data)
    loss = F.mse_loss(prediction.view(-1), target)
    # print('prediction', prediction.view(-1).shape)                              # this number here is [E] and should be equal to [F]
    # print('target', target.shape)                                               # this number here is [F] and should be equal to [E]
    loss.backward()
    optimizer.step()
    return model

def train():
    for data_index in range(len(remote_dataset[0])-1):
        for remote_index in range(len(compute_nodes)):
            data, target = remote_dataset[remote_index][data_index]
            models[remote_index] = update(data, target, models[remote_index], optimizers[remote_index])
        for model in models:
            model.get()
        return utils.federated_avg({
            "sm1": models[0],
            "sm2": models[1]
        })

In [None]:
def test(federated_model):
    federated_model.eval()
    test_loss = 0
    for data, target in test_loader:
        output = federated_model(data)
        test_loss += F.mse_loss(output.view(-1), target, reduction='sum').item()
        predection = output.data.max(1, keepdim=True)[1]
        
    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}'.format(test_loss))

In [None]:
for epoch in range(args.epochs):
    start_time = time.time()
    print(f"Epoch Number {epoch + 1}")
    federated_model = train()
    model = federated_model
    test(federated_model)
    total_time = time.time() - start_time
    print('Communication time over the network', round(total_time, 2), 's\n')

Epoch Number 1
Test set: Average loss: 175.5453
Communication time over the network 0.75 s

Epoch Number 2
Test set: Average loss: 177.2856
Communication time over the network 0.76 s

Epoch Number 3
Test set: Average loss: 175.5752
Communication time over the network 0.77 s

Epoch Number 4
Test set: Average loss: 176.4132
Communication time over the network 0.73 s

Epoch Number 5
Test set: Average loss: 170.8728
Communication time over the network 0.75 s

Epoch Number 6
Test set: Average loss: 166.7938
Communication time over the network 0.75 s

Epoch Number 7
Test set: Average loss: 163.5324
Communication time over the network 0.77 s

Epoch Number 8
Test set: Average loss: 171.4148
Communication time over the network 0.75 s

Epoch Number 9
Test set: Average loss: 169.4812
Communication time over the network 0.75 s

Epoch Number 10
Test set: Average loss: 177.3963
Communication time over the network 0.75 s

Epoch Number 11
Test set: Average loss: 167.7639
Communication time over the ne