<a href="https://colab.research.google.com/github/agatagruza/private-ai/blob/master/SPAIC_Project11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Project 11: Toy Federated Learning
Train a toy model the centralized way. To do that, you need:
*   a toy dataset
*   a model
*   some basic training logic for training a model to fit the data

In [0]:
pip install syft

In [2]:
import torch as th
import numpy as np
import syft as sy
from torch import nn, optim #optim=optimizer

W0725 03:45:41.055709 140434793203584 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0725 03:45:41.079141 140434793203584 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [0]:
hook = sy.TorchHook(th)

In [0]:
# Our Toy Dataset. Two inputs and one output
# All data are with us, on CENTRALIZE SERVER in the cloud
# We have access to this data internally
data = th.tensor([[1.,1],[0,1],[1,0],[0,0]], requires_grad=True)
target = th.tensor([[1.],[1], [0], [0]], requires_grad=True)

In [12]:
print(data.dtype)
print(target.dtype)

torch.float32
torch.float32


In [0]:
# Our Toy Model (linear model)
model = nn.Linear(2, 1)

In [0]:
# Our optimizer
# SGD = Stochastic Gradient Descent
optimizer = optim.SGD(params=model.parameters(), lr=0.1)

In [0]:
#train function is a linear model that can learn on some toy data
def train(epochs):
  mean_loss = []
  for i in range(epochs): 
    optimizer.zero_grad()   
    pred = model(data)   
    #loss function = mean square error loss
    loss = ((pred - target)**2).sum()  
    loss.backward() # propagation 
    optimizer.step()  
    print(loss.data)  
    mean_loss.append(loss.data)
  print(f"Average loss : {sum(mean_loss)/len(mean_loss)}")

In [16]:
train(20)

tensor(1.3647)
tensor(0.4921)
tensor(0.3347)
tensor(0.2528)
tensor(0.1931)
tensor(0.1476)
tensor(0.1129)
tensor(0.0863)
tensor(0.0660)
tensor(0.0505)
tensor(0.0386)
tensor(0.0295)
tensor(0.0226)
tensor(0.0173)
tensor(0.0132)
tensor(0.0101)
tensor(0.0077)
tensor(0.0059)
tensor(0.0045)
tensor(0.0035)
Average loss : 0.16267895698547363


##How do we make our model a Federated Model?
**VirtualWorker is a  Python object. </br>
Here Linear model has two tensors: a weights and a bias.**

In [0]:
# We wan to move our data to individual machines somewhere else that 
# is owned by other people. We can try to traina model that is doing 
# trainin on those remote machines.  
data = th.tensor([[1.,1],[0,1],[1,0],[0,0]], requires_grad=True)
target = th.tensor([[1.],[1], [0], [0]], requires_grad=True)

In [0]:
bob = sy.VirtualWorker(hook, id = "bob")
alice = sy.VirtualWorker(hook, id = "alice")

# Distributing data remotely. First set of data are sent to bob,
# Second set of data is sent to alice. 
data_bob = data[0:2].send(bob) # 1st two rows of data are sent to bob
target_bob = target[0:2].send(bob)
data_alice = data[2:4].send(alice)
target_alice = target[2:4].send(alice)
datasets = [(data_bob, target_bob), (data_alice, target_alice)] #tuples

In [0]:
list(model.parameters)

In [25]:
_data.location # each pounter has a reference to the virtual worker
# that exsists on other machine

<VirtualWorker id:bob #objects:2>

In [0]:
def train_model(epochs=20):
  model = nn.Linear(2,1)
  optimizer = optim.SGD(params=model.parameters(), lr=0.1)
  
  for e in range(epochs):
    for inputs, labels in datasets:
      
      model = model.send(inputs.location) # <-- FEDERATED PART
      # sending model to the location where 
      #tensors are located. This line iterates through every tensor inside this model.
      
      # do regular training
      optimizer.zero_grad()
      pred = model(inputs)    
      loss = ((pred - labels)**2).sum()  
      loss.backward()   
      optimizer.step()  
      
      model =  model.get() # Bringing model back to us
      
      print(loss.get())

In [31]:
train_model()

tensor(1.3982, requires_grad=True)
tensor(0.8651, requires_grad=True)
tensor(0.4284, requires_grad=True)
tensor(0.5273, requires_grad=True)
tensor(0.2354, requires_grad=True)
tensor(0.3073, requires_grad=True)
tensor(0.1356, requires_grad=True)
tensor(0.1786, requires_grad=True)
tensor(0.0784, requires_grad=True)
tensor(0.1038, requires_grad=True)
tensor(0.0453, requires_grad=True)
tensor(0.0604, requires_grad=True)
tensor(0.0262, requires_grad=True)
tensor(0.0352, requires_grad=True)
tensor(0.0151, requires_grad=True)
tensor(0.0205, requires_grad=True)
tensor(0.0088, requires_grad=True)
tensor(0.0120, requires_grad=True)
tensor(0.0051, requires_grad=True)
tensor(0.0070, requires_grad=True)
tensor(0.0029, requires_grad=True)
tensor(0.0041, requires_grad=True)
tensor(0.0017, requires_grad=True)
tensor(0.0024, requires_grad=True)
tensor(0.0010, requires_grad=True)
tensor(0.0014, requires_grad=True)
tensor(0.0006, requires_grad=True)
tensor(0.0008, requires_grad=True)
tensor(0.0003, requi

##How can you further alleviate and further preserve the privacy of the individuals who are involved?
By revrese engineering we still are able to look at the diff between the model that we sent and the model that get back. 
Two ways to mitigate it:
1.   Train more than one iteration as it become more dificult to reverse engineer what the gradients were. 
2.   Insteaed of bringing model directly back to us and then sending to alice, we instead train multiple different models in parallel on different workers,
and on different people's datasets. Then average those models together (avg of multiple people models).  </br>
**When we take sums of information across multiople different people, we begin to be able to create plausible deniability as to who actually modified each weight.**


##Advanced Remote Execution Tools
In the last section we trained a toy model using Federated Learning. We did this by calling **.send()** and** .get()** on our model, sending it to the location of training data, updating it, and then bringing it back. However, at the end of the example we realized that we needed to go a bit further to protect people privacy. Namely, we want to average the gradients BEFORE calling .get(). That way, we won't ever see anyone's exact gradient (thus better protecting their privacy!!!)

But, in order to do this, we need a few more pieces:
*   use a pointer to send a Tensor directly to another worker</br>

Your job is to figure out:
1.   How to train a model on multple different worklers at the same time. 
2.   How can we properly perform the great aggregations.

In [33]:
# Clearing objects (deleting all tensors from bob and alice that might be around from a previous exercise)
bob.clear_objects()

<VirtualWorker id:bob #objects:0>

In [34]:
alice.clear_objects()

<VirtualWorker id:alice #objects:0>

In [35]:
x =  th.tensor([1, 2, 3, 4, 5]).send(bob)
x # We can see in next line that we have pointer to bob

(Wrapper)>[PointerTensor | me:97528710837 -> bob:30637848633]

In [36]:
bob._objects

{30637848633: tensor([1, 2, 3, 4, 5])}

In [37]:
x = x.send(alice) # sending pointer to alice and replace x with on our machine with a pointer tothat pointer. 
x

(Wrapper)>[PointerTensor | me:96386067524 -> alice:97528710837]

In [38]:
bob._objects # data didn't move

{30637848633: tensor([1, 2, 3, 4, 5])}

In [39]:
alice._objects # alice has a pointer form alice to bob

{97528710837: (Wrapper)>[PointerTensor | alice:97528710837 -> bob:30637848633]}

In [40]:
x # our pointer no longer points to bob. Instead it points to alice
# message 1st will go to alice machine, will be processed there.
# Will call alice pointer. Alice pointer will forward message to bob's machine.
# And it will be executed on bob's machine

(Wrapper)>[PointerTensor | me:96386067524 -> alice:97528710837]

In [41]:
# x is owned by bob and alice. We as a central server cannot contact that tensor. 
# We cannot do anything with that tensor unless BOTH bob and alice aloow us to.
y = x + x
y

(Wrapper)>[PointerTensor | me:24380254960 -> alice:57556849586]

In [0]:
bob._objects

In [0]:
alice._objects

In [0]:
jon = sy.VirtualWorker(hook, id="jon")
bob.clear_objects()
alice.clear_objects()

x = th.tensor([1,2,3,4,5]).send(bob).send(alice)

In [42]:
bob._objects # 2 tensors on bob's machine

{30637848633: tensor([1, 2, 3, 4, 5]),
 66687343704: tensor([ 2,  4,  6,  8, 10])}

In [43]:
alice._objects # 2 TENSORS on alice's machine, but both of them are POINTERS

{57556849586: (Wrapper)>[PointerTensor | alice:57556849586 -> bob:66687343704],
 97528710837: (Wrapper)>[PointerTensor | alice:97528710837 -> bob:30637848633]}

**The new pointer (tensor) that was created has the same pointer chain, same dependency chain, same ownership chain as the original tensor that was used to create it.**

In [0]:
jon = sy.VirtualWorker(hook, id="jon")

In [0]:
bob.clear_objects()
alice.clear_objects()

x = th.tensor([1,2,3,4,5]).send(bob).send(alice)

In [50]:
bob._objects

{89150023507: tensor([1, 2, 3, 4, 5])}

In [51]:
y

(Wrapper)>[PointerTensor | me:77489359810 -> jon:41337769945]

In [52]:
jon._objects

{41337769945: (Wrapper)>[PointerTensor | jon:41337769945 -> bob:30928689799]}

In [53]:
alice._objects

{65423884866: (Wrapper)>[PointerTensor | alice:65423884866 -> bob:89150023507]}

In [54]:
x = x.get()
x

(Wrapper)>[PointerTensor | me:65423884866 -> bob:89150023507]

In [0]:
alice._objects

In [55]:
bob._objects

{89150023507: tensor([1, 2, 3, 4, 5])}

In [56]:
alice._objects

{}

In [57]:
x = x.get()
x

tensor([1, 2, 3, 4, 5])

In [58]:
bob._objects

{}

##Pointer Chain Operations

In [75]:
bob.clear_objects()
alice.clear_objects()

<VirtualWorker id:alice #objects:0>

In [0]:
# Data are sent to bob, pointer is sent to alice.
x = th.tensor([1, 2, 3, 4, 5]).send(bob).send(alice)

In [77]:
bob._objects

{41751440282: tensor([1, 2, 3, 4, 5])}

In [78]:
alice._objects

{42009744974: (Wrapper)>[PointerTensor | alice:42009744974 -> bob:41751440282]}

In [79]:
x

(Wrapper)>[PointerTensor | me:13663893969 -> alice:42009744974]

In [80]:
# Pulling data from bob to alice
# It's like telling alice "call .get() on your pointer and pull data that bob has to alice's machine"
x.remote_get() #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

(Wrapper)>[PointerTensor | me:13663893969 -> alice:42009744974]

**We were able to orechestrate a move data from bob's machine to alice's machine without ever touching our machine.**

In [81]:
bob._objects

{}

In [82]:
alice._objects # data moved to alice

{42009744974: tensor([1, 2, 3, 4, 5])}

In [0]:
# our pointer cecomes appointed to bob, instead of being appointed to alice
x.move(bob) #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [70]:
x

(Wrapper)>[PointerTensor | me:67090780786 -> bob:67090780786]

In [71]:
bob._objects

{67090780786: tensor([1, 2, 3, 4, 5])}

In [72]:
alice._objects

{}

In [0]:
x.move(alice)

In [85]:
bob._objects

{13663893969: tensor([1, 2, 3, 4, 5])}

In [86]:
alice._objects

{}