# FEDERATED LEARNING OF A RECURRENT NEURAL NETWORK ON RASPBERRY PIS

https://blog.openmined.org/federated-learning-of-a-rnn-on-raspberry-pis/

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!pip install syft==0.2.8

Collecting syft==0.2.8
[?25l  Downloading https://files.pythonhosted.org/packages/69/eb/e7ad7f909c53477f9fbe732aaa5cf036122428268e5239b18344604bc8f8/syft-0.2.8-py3-none-any.whl (415kB)
[K     |████████████████████████████████| 419kB 5.6MB/s 
[?25hCollecting lz4~=3.0.2
[?25l  Downloading https://files.pythonhosted.org/packages/e7/81/011fef8766fb0ef681037ad6fee96168ee03a864464986cbaa23e5357704/lz4-3.0.2-cp36-cp36m-manylinux2010_x86_64.whl (1.8MB)
[K     |████████████████████████████████| 1.8MB 17.9MB/s 
Collecting numpy~=1.18.1
[?25l  Downloading https://files.pythonhosted.org/packages/b3/a9/b1bc4c935ed063766bce7d3e8c7b20bd52e515ff1c732b02caacf7918e5a/numpy-1.18.5-cp36-cp36m-manylinux1_x86_64.whl (20.1MB)
[K     |████████████████████████████████| 20.1MB 1.3MB/s 
[?25hCollecting Pillow~=6.2.2
[?25l  Downloading https://files.pythonhosted.org/packages/8a/fd/bbbc569f98f47813c50a116b539d97b3b17a86ac7a309f83b2022d26caf2/Pillow-6.2.2-cp36-cp36m-manylinux1_x86_64.whl (2.1MB)
[K     |█

In [3]:
import torch
import syft as sy


import torch.nn as nn 
import matplotlib.pyplot as plt 



Hook pytorch using sy.TorchHook, which makes an extended functions on Pytorch tensors available.

In [4]:
hook = sy.TorchHook(torch)

##Creating Workers <br>
2 devices first device is smart_meter_1 and the second is smart_meter_2 <br>
**You can either use remote workers or virtual workers**

In [5]:
smart_meter1 = sy.VirtualWorker(hook, id="sm1")
smart_meter2 = sy.VirtualWorker(hook, id="sm2")


In [6]:
# #Now let's define our workers. You can either use remote workers or virtual workers

# hook = sy.TorchHook(torch)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning

# #alice = sy.VirtualWorker(hook, id="alice")  
# #bob = sy.VirtualWorker(hook, id="bob")  
# #charlie = sy.VirtualWorker(hook, id="charlie") 

# #workers_virtual = [alice, bob]

# #If you have your workers operating remotely, like on Raspberry PIs
# my_ip="YOUR_IP"
# kwargs_websocket_alice = {"host": my_ip, "hook": hook}
# alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket_alice)
# kwargs_websocket_bob = {"host": my_ip, "hook": hook}
# bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket_bob)
# workers_virtual = [alice, bob]

# Importing Libraries

In [7]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

import torch
import torch.nn.functional as f
import torch.optim as optim
from torch import nn,optim


import random
import time
import json
import copy
import os
import glob



Check if CUDA is avalible

In [8]:
train_on_gpu = torch.cuda.is_available()  #Returns a bool indicating if CUDA is currently available.

if not train_on_gpu:
  device = torch.device('cuda')
  print('CUDA is not available, training on CPU ..')

else:
  device = torch.device('cpu')
  print('CUDA is avalible, Training on GPU ..')

device

CUDA is not available, training on CPU ..


device(type='cuda')

# Loading For Data

**Load data after *data-Preprocessing* , only 40k datapoint** <br>
Link: https://colab.research.google.com/drive/1JSVlwt4xZG4_MizyzHb3V_WMViWTTwVs?usp=sharing

In [22]:
path='/content/drive/My Drive/GP | Smart Meter | CIC/Datasets'

In [23]:
data = pd.read_csv(path + "/sample_data_.csv")


In [24]:
data = data.drop(['Unnamed: 0','day'], axis = 1)

In [25]:
print(data[:])

           LCLid  energy_sum  temperatureMax  ...  month  season  year
0      MAC001497       4.364           17.77  ...      7       2  2012
1      MAC003176      10.340           20.82  ...     10       0  2013
2      MAC001679       4.236           27.12  ...      9       0  2012
3      MAC001544      13.980           15.87  ...     10       0  2013
4      MAC000655       6.663            8.27  ...      3       1  2013
...          ...         ...             ...  ...    ...     ...   ...
39995  MAC005024       4.794           20.33  ...      6       2  2013
39996  MAC002257       3.277           11.00  ...     11       0  2013
39997  MAC003571       4.108            7.30  ...      3       1  2013
39998  MAC004038       1.269           22.42  ...      8       2  2013
39999  MAC004163       7.357           24.62  ...      7       2  2013

[40000 rows x 34 columns]


In [26]:
# data --> [2:]
# target --> [only second column]

##Create Input and Output data

In [27]:

features = data.drop(['LCLid','energy_sum'], axis = 1)
features =  features.to_numpy()
target = data[['energy_sum']]
target =  target.to_numpy()


# Splitting data
training set 80% <br>
test set 10% <br>
validation set 10%

In [28]:
split_frac = 0.8
split_rem = 0.5
split_idx= int (len(features)*split_frac)
train_x,remaining_x = features[:split_idx], features[split_idx:]
train_y,remaining_y = target[:split_idx], target[split_idx:]
test_idx = int (len(remaining_x)*split_rem)
val_x,test_x = remaining_x[:test_idx],remaining_x[test_idx:]
val_y,test_y = remaining_y[:test_idx],remaining_y[test_idx:]


print("\t\t\t Feature shapes:")
print("Train set: \t\t{}\n".format(train_x.shape),
      "Val set: \t\t{}\n".format(val_x.shape),
      "Test set: \t\t{}\n".format(test_x.shape),
      )

			 Feature shapes:
Train set: 		(32000, 32)
 Val set: 		(4000, 32)
 Test set: 		(4000, 32)



# Distributing the data
Our virtual workers have been created but they dont have any data on them. After loading our data, we distribute the data between Smart Meter 1 and Smart meter 2.
We do this by splitting the training, validation and testing sets into 2 a sending them to the worker using **sy.BaseDataset** class

Note: We splitted the data randomly to 2 devices {only for testing} <br>
**but then i will split it by LCLid**

In [29]:
train_idx = int (train_x.shape[0]/2)
valid_idx = int (val_x.shape[0]/2)
test_idx = int (test_x.shape[0]/2)

#sending data to virual workers

Smart_meter1_train_dataset = sy.BaseDataset(torch.from_numpy(train_x[:train_idx]), 
                                  torch.from_numpy(train_y[:train_idx])).send(smart_meter1)
Smart_meter2_train_dataset = sy.BaseDataset(torch.from_numpy(train_x[train_idx:]), 
                                    torch.from_numpy(train_y[train_idx:])).send(smart_meter2)


Smart_meter1_val_dataset = sy.BaseDataset(torch.from_numpy(val_x[:valid_idx]), 
                                  torch.from_numpy(val_y[:valid_idx])).send(smart_meter1)
Smart_meter2_val_dataset = sy.BaseDataset(torch.from_numpy(val_x[valid_idx:]), 
                                  torch.from_numpy(val_y[valid_idx:])).send(smart_meter2)


Smart_meter1_test_dataset = sy.BaseDataset(torch.from_numpy(test_x[:test_idx]), 
                                  torch.from_numpy(test_y[:test_idx])).send(smart_meter1)
Smart_meter2_test_dataset = sy.BaseDataset(torch.from_numpy(test_x[test_idx:]), 
                                  torch.from_numpy(test_y[test_idx:])).send(smart_meter2)



# Creating Federated DataLoaders
we load datasets using dataloaders. **In FL we load datasets from diffrent devices in Federated manner using Federated DataLoaders**

In [30]:
# Creating federated datasets, an extension of Pytorch TensorDataset class
federated_train_dataset = sy.FederatedDataset([Smart_meter1_train_dataset, Smart_meter2_train_dataset])
federated_valid_dataset = sy.FederatedDataset([Smart_meter1_val_dataset, Smart_meter2_val_dataset])
federated_test_dataset = sy.FederatedDataset([Smart_meter1_test_dataset, Smart_meter2_test_dataset])

BATCH_SIZE = 50

# Creating federated dataloaders, an extension of Pytorch DataLoader class
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, 
                                                shuffle=True, batch_size=BATCH_SIZE)

federated_valid_loader = sy.FederatedDataLoader(federated_valid_dataset, 
                                                shuffle=True, batch_size=BATCH_SIZE)

federated_test_loader = sy.FederatedDataLoader(federated_test_dataset, 
                                               shuffle=False, batch_size=BATCH_SIZE)

# Build Neural Network

#Send our Model to both devices

# Do Normal Training & get the smarter model back from devices

<torch._C.Generator at 0x7f3f51ec50f0>