<a href="https://colab.research.google.com/github/agatagruza/private-ai/blob/master/SPAIC_Project15.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 15: Encrypted Computation in PySyft
**One-hot vector** - vector of 0s and single 1. For example [0,0,0,1,0,0,0]. The placement of 1 actually indicates what this is trying to encode, 

In [0]:
pip install syft

In [2]:
import torch as th
import syft as sy
hook = sy.TorchHook(th)
from torch import nn, optim

W0729 02:01:28.957506 140644463777664 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0729 02:01:28.977870 140644463777664 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [0]:
bob = sy.VirtualWorker(hook, id = "bob").add_worker(sy.local_worker)
alice = sy.VirtualWorker(hook, id = "alice").add_worker(sy.local_worker)
secure_worker = sy.VirtualWorker(hook, id = "secure_worker").add_worker(sy.local_worker)

In [0]:
x = th.tensor([1,2,3,4])
y = th.tensor([2,-1,1,0])

In [0]:
x = x.share(bob, alice, crypto_provider = secure_worker)
y = y.share(bob, alice, crypto_provider = secure_worker)

In [6]:
x

(Wrapper)>[AdditiveSharingTensor]
	-> (Wrapper)>[PointerTensor | me:48808399525 -> bob:83269577641]
	-> (Wrapper)>[PointerTensor | me:2759964832 -> alice:62311018612]
	*crypto provider: secure_worker*

In [7]:
y

(Wrapper)>[AdditiveSharingTensor]
	-> (Wrapper)>[PointerTensor | me:9184329531 -> bob:2489209080]
	-> (Wrapper)>[PointerTensor | me:41842426963 -> alice:79943414084]
	*crypto provider: secure_worker*

In [8]:
bob._objects

{2489209080: tensor([3233921928338619335, 3752955590729021660, 2500543327783326584,
         4330775474662649906]),
 83269577641: tensor([2396882364913519452, 4477732814279387658, 4211266921017015768,
         1843735096073425993])}

In [9]:
alice._objects

{62311018612: tensor([-2396882364913519451, -4477732814279387656, -4211266921017015765,
         -1843735096073425989]),
 79943414084: tensor([-3233921928338619333, -3752955590729021661, -2500543327783326583,
         -4330775474662649906])}

In [10]:
secure_worker._objects

{}

In [11]:
z = x + y
z.get()

tensor([3, 1, 4, 4])

In [12]:
z = x - y
z.get()

tensor([-1,  3,  2,  4])

In [13]:
z = x * y
z.get()

tensor([ 2, -2,  3,  0])

In [14]:
z = x > y
z.get()

tensor([0, 1, 1, 1])

In [17]:
z = x < y
z.get()

tensor([1, 0, 0, 0])

In [15]:
z = x == y
z.get()

tensor([0, 0, 0, 0])

In [0]:
x = th.tensor([1,2,3,4])
y = th.tensor([2,-1,1,0])

x = x.fix_precision().share(bob, alice, crypto_provider=secure_worker)
y = y.fix_precision().share(bob, alice, crypto_provider=secure_worker)

In [20]:
z = x + y
z.get().float_precision()

tensor([3., 1., 4., 4.])

In [21]:
z = x - y
z.get().float_precision()

tensor([-1.,  3.,  2.,  4.])

In [22]:
z = x * y
z.get().float_precision()

tensor([ 2., -2.,  3.,  0.])

In [23]:
z = x > y
z.get().float_precision()

tensor([0., 1., 1., 1.])

In [24]:
z = x < y
z.get().float_precision()

tensor([1., 0., 0., 0.])

In [25]:
z = x == y
z.get().float_precision()

tensor([0., 0., 0., 0.])

In [29]:
x[0:3].get().float_precision()

tensor([1., 2., 3.])

##Build an Encrypted Database

In [0]:
import string

In [0]:
char2index = {} 
index2char = {} 

In [0]:
##' ' + string.ascii_lowercase + '0123456789' + string.punctuation

In [0]:
for i, char in enumerate(' ' + string.ascii_lowercase + '0123456789' + string.punctuation):
  char2index[char] = i
  index2char[i] = char

In [34]:
char2index

{' ': 0,
 '!': 37,
 '"': 38,
 '#': 39,
 '$': 40,
 '%': 41,
 '&': 42,
 "'": 43,
 '(': 44,
 ')': 45,
 '*': 46,
 '+': 47,
 ',': 48,
 '-': 49,
 '.': 50,
 '/': 51,
 '0': 27,
 '1': 28,
 '2': 29,
 '3': 30,
 '4': 31,
 '5': 32,
 '6': 33,
 '7': 34,
 '8': 35,
 '9': 36,
 ':': 52,
 ';': 53,
 '<': 54,
 '=': 55,
 '>': 56,
 '?': 57,
 '@': 58,
 '[': 59,
 '\\': 60,
 ']': 61,
 '^': 62,
 '_': 63,
 '`': 64,
 'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26,
 '{': 65,
 '|': 66,
 '}': 67,
 '~': 68}

In [35]:
index2char

{0: ' ',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 27: '0',
 28: '1',
 29: '2',
 30: '3',
 31: '4',
 32: '5',
 33: '6',
 34: '7',
 35: '8',
 36: '9',
 37: '!',
 38: '"',
 39: '#',
 40: '$',
 41: '%',
 42: '&',
 43: "'",
 44: '(',
 45: ')',
 46: '*',
 47: '+',
 48: ',',
 49: '-',
 50: '.',
 51: '/',
 52: ':',
 53: ';',
 54: '<',
 55: '=',
 56: '>',
 57: '?',
 58: '@',
 59: '[',
 60: '\\',
 61: ']',
 62: '^',
 63: '_',
 64: '`',
 65: '{',
 66: '|',
 67: '}',
 68: '~'}

In [0]:
str_input = "Hello"
max_len = 8

In [0]:
# def string2values(str_input, max_len=8)

def string2values(str_input, max_len = 8):
  
  str_input = str_input[:max_len].lower() 
  
  # Pad strings shorter than max len 
  if(len(str_input) < max_len):
    str_input = str_input + "." * (max_len - len(str_input))
  
  values = list()
  for char in str_input:
    values.append(char2index[char])
    
  return th.tensor(values).long      

In [37]:
string2values("super awesome agata :)")

<bound method TorchHook.get_hooked_method.<locals>.overloaded_native_method of tensor([19, 21, 16,  5, 18,  0,  1, 23])>

In [0]:
def one_hot_vector(index, length):
  vect = th.zeros(length).long() 
  vect[index] = 1 
  return vect  

In [39]:
one_hot_vector(1, 8)

tensor([0, 1, 0, 0, 0, 0, 0, 0])

In [0]:
def string2one_hot_matrix(str_input, max_length=8):
  str_input = str_input[:max_length].lower()
  
  # Pad strings shorter than max len
  if(len(str_input) < max_length):
    str_input = str_input + "." * (max_length - len(str_input))
  
  char_vectors = list()
  for char in str_input:
    char_v = one_hot_vector(char2index[char], len(char2index)).unsqueeze(0) 
    char_vectors.append(char_v)
  return th.cat(char_vectors, dim = 0)  

In [0]:
matrix = string2one_hot_matrix('agata')

In [42]:
matrix.shape

torch.Size([8, 69])

In [43]:
matrix

tensor([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [0]:
def strings_equal(str_a, str_b):
  vect = (str_a * str_b).sum(1) 
  
  x = vect[0] 
  
  for i in range(vect.shape[0] - 1): 
    x *= vect[i + 1] 
    
  key_match = x
  return key_match

In [46]:
strings_equal(string2one_hot_matrix("agat"), string2one_hot_matrix("agata"))

tensor(0)

In [0]:
keys = list()
values = list()

keys.append(string2one_hot_matrix("key1"))
keys.append(string2one_hot_matrix("key2"))

In [48]:
keys

[tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [0]:
values.append(string2values("value1"))
values.append(string2values("value2"))

In [50]:
values

[<bound method TorchHook.get_hooked_method.<locals>.overloaded_native_method of tensor([22,  1, 12, 21,  5, 28, 50, 50])>,
 <bound method TorchHook.get_hooked_method.<locals>.overloaded_native_method of tensor([22,  1, 12, 21,  5, 29, 50, 50])>]

In [0]:
query_str = "key2"
query_matrix = string2one_hot_matrix(query_str)

key_matches = list()
for key in keys:
  key_match = strings_equal(key, query_matrix)
  key_matches.append(key_match)

In [57]:
print("key_matches = ", key_matches)
print("values = ", values)

key_matches =  [tensor(0), tensor(1)]
values =  [<bound method TorchHook.get_hooked_method.<locals>.overloaded_native_method of tensor([22,  1, 12, 21,  5, 28, 50, 50])>, <bound method TorchHook.get_hooked_method.<locals>.overloaded_native_method of tensor([22,  1, 12, 21,  5, 29, 50, 50])>]


In [0]:
def values2string(input_values):
    s = ""
    for value in input_values:
        s += index2char[int(value)]
    return s

In [61]:
p = string2values("agata")
print(p)

<bound method TorchHook.get_hooked_method.<locals>.overloaded_native_method of tensor([ 1,  7,  1, 20,  1, 50, 50, 50])>


In [0]:
def string2values(str_input, max_len=8):

    str_input = str_input[:max_len].lower()

    # pad strings shorter than max len
    if(len(str_input) < max_len):
        str_input = str_input + "." * (max_len - len(str_input))

    values = list()
    for char in str_input:
        values.append(char2index[char])

    return th.tensor(values).long()

def values2string(input_values):
    s = ""
    for value in input_values:
        s += index2char[int(value)]
    return s

def strings_equal(str_a, str_b):

    vect = (str_a * str_b).sum(1)

    x = vect[0]

    for i in range(vect.shape[0] - 1):
        x = x * vect[i + 1]    

    return x

def one_hot(index, length):
    vect = th.zeros(length).long()
    vect[index] = 1
    return vect
def string2one_hot_matrix(str_input, max_len=8):

    str_input = str_input[:max_len].lower()

    # pad strings shorter than max len
    if(len(str_input) < max_len):
        str_input = str_input + "." * (max_len - len(str_input))

    char_vectors = list()
    for char in str_input:
        char_v = one_hot(char2index[char], len(char2index)).unsqueeze(0)
        char_vectors.append(char_v)
        
    return th.cat(char_vectors, dim=0)

In [0]:
class EncryptedDB():
    
    def __init__(self, *owners, max_key_len=8, max_val_len=8):
        self.max_key_len = max_key_len
        self.max_val_len = max_val_len
        
        self.keys = list()
        self.values = list()
        self.owners = owners
        
    def add_entry(self, key, value):
        key = string2one_hot_matrix(key)
        key = key.share(*self.owners)
        self.keys.append(key)
        
        value = string2values(value, max_len=self.max_val_len)
        value = value.share(*self.owners)
        self.values.append(value)
        
    def query(self, query_str):
        query_matrix = string2one_hot_matrix(query_str)
        
        query_matrix = query_matrix.share(*self.owners)

        key_matches = list()
        for key in self.keys:

            key_match = strings_equal(key, query_matrix)
            key_matches.append(key_match)

        result = self.values[0] * key_matches[0]

        for i in range(len(self.values) - 1):
            result += self.values[i+1] * key_matches[i+1]
            
        result = result.get()

        return values2string(result).replace(".","")

In [74]:
db = EncryptedDB(bob, alice, secure_worker, max_val_len=256)

db.add_entry("Bob","(123) 456 7890")
db.add_entry("Bill", "(234) 567 8901")
db.add_entry("Sam","(345) 678 9012")
db.add_entry("Key","really big json value")

db.query("Bob")

'(123) 456 7890'