In [37]:
import collections
import hashlib
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
import random
from collections import Counter

In [38]:
class Differential_Privacy_CMS():
    def __init__(self, width, depth,epsilon,max_coor):
        ''' Method to initialize the data structure
        @param width int: Width of the table
        @param depth int: Depth of the table (num of hash func)
        @param epsilon: privacy parameter
        In this case, when we declare differential privacy class instance, we give the parameters m and k, or width and depth
        '''
        self.width = width #hash values range between 0 to width-1
        self.depth = depth #number of hash functions
        self.epsilon = epsilon 
        self.seed = np.random.choice(np.arange(0,self.width),replace=False,size=self.depth)
        self.max = max_coor

    #Implementing the client side algorithm which works on each particular data element. First it selects an integer randomly from 0 to depth-1, 
    #we store that value as j, next, we initialize a vector of -1, of length width, and set that jth index as 1. Then we create another vector of -1s
    #and +1s, with fixed probabilities determined by the parameter epsilon. Then we do element wise multioplication and return the 
    #result vector. 
    def CLient_Side(self,d):
        '''parameters are a data element, epsilon value and a hash family, but we are using mmh3 here so didn't add that as a parameter'''
        d = d[0]*self.max+d[1]
        j = np.random.randint(self.depth) 
        z_vect = np.zeros((1,self.width))
        v = z_vect-1 
        hex_num = hashlib.sha256((str(self.seed[j])+str(d)).encode('utf-8')).hexdigest()
        int_hex = int(hex_num[:5],16)
        index = int_hex % self.width
        v[0,index]=1
        val = np.exp(self.epsilon/2)
        probability_of_1 = val/(val+1) 
        probability_of_neg_1 = 1/(val+1)
        b = np.random.choice([1,-1],self.width,p=[probability_of_1,probability_of_neg_1]) 
        final_vector = v*b
        return (final_vector,j) 

    def Compute_Sketch_Matrix(self,D):
        '''So each element of D is a tuple where the first element is v_i and second element is j_i(which is created by the above funstion after getting 
        passed by Client_side each time), we have privacy parameter epsilon and dimensions. v_i is the a vector an it has the sma eshape as the 
        vectors returned by the Client_side algorithm'''
        val = np.exp(self.epsilon/2)
        n = len(D)
        c_epsilon = (val+1)/(val-1) 
        vec_one = np.ones((1,self.width))
        manipulated_data_matrix = np.zeros((n,self.width)) #Creatimg a matrix for x_is
        for elt in enumerate(D):
            new_vect = elt[1][0].reshape((1,self.width))
            man_vect = (c_epsilon/2)*new_vect #scalar and vector multiplication 
            half_vec_one = 0.5*vec_one 
            sum_vect = man_vect + half_vec_one
            manipulated_data_matrix[[elt[0]],:]= self.depth*sum_vect  
        M = np.zeros((self.depth,self.width))
        for elt in enumerate(D):
            for l in range(self.width): 
                M[elt[1][1],l]=M[elt[1][1],l]+manipulated_data_matrix[elt[0],l]
        return M 

    def Server_Side(self,Sketch_Matrix,d,length):
        '''It returns the estimated frequency of a data element given to it. So it has two parameters, data element and the length of the 
        data stream we are considering'''
        d = d[0]*self.max+d[1]
        n = length 
        frac1 = self.width/(self.width-1)
        frac2 = n/self.width
        row_sum = 0
        for i in range(self.depth):
            hex_num = hashlib.sha256((str(self.seed[i])+str(d)).encode('utf-8')).hexdigest()
            int_hex = int(hex_num[:5],16)
            index = int_hex % self.width
            row_sum = row_sum + Sketch_Matrix[i,index]
        avg_row_sum = row_sum/self.depth 
        subtraction = avg_row_sum - frac2 
        assumed_freq = frac1*subtraction 
        return assumed_freq
        
    def Count_Mean_Sketch(self,D_s,D): 
        '''D_s is the stream of data and this is a subset of universe of data'''
        Modified_datalist = []
        Sketch_Matrix = []
        freq_vect = {}
        length = len(D_s) 
        for elt in enumerate(D_s):
            Modified_datalist.append(self.CLient_Side(elt[1])) 
        Sketch_Matrix = self.Compute_Sketch_Matrix(Modified_datalist)
        for d in D:
            freq_vect[d] = self.Server_Side(Sketch_Matrix,d,length) 

        return freq_vect

In [52]:
def frequency_counter_on_average(path_list,epsilon,universe_list):
    '''This function runs the entire process multiple times and take the running average of frequencies for reducing error. Parameters are the 
    original path whose frequency histogram we want to get, universe list which is basically all the points the server will find the frequency
    '''
    freq_counter_cum = {d:0 for d in universe_list}
    for i in range(500):
        class_instance = Differential_Privacy_CMS(100,100,epsilon,90)
        freq_counter_new = class_instance.Count_Mean_Sketch(path_list, universe_list)
        for d in freq_counter_new:
            freq_counter_cum[d]=(i*freq_counter_cum[d]+freq_counter_new[d])/(i+1)
    return freq_counter_cum
    

In [40]:
def dictionary_adder(dict1,dict2):
    z = dict1.copy()
    for elt in dict2:
        z[elt]=z.get(elt,0)+dict2[elt]
    return z

In [41]:
#This paret generates a true path and mentioned number of fake paths.
def vertex_position_determiner(vertex):
    '''it determines whether any vertice is corner or on the border-edge or anything else'''
    x = vertex[0]
    y = vertex[1]
    if x==wrap.x_min or x==wrap.x_max:
        if y==wrap.y_min or y==wrap.y_max:
            return 'corner'
        else:
            return 'border-edge'
    elif y==wrap.y_min or y==wrap.y_max:
        return 'border-edge'
    else:
        return 'normal'
def corner_neighbor_gen(vertex):
    '''generates neighbors of 4 corners'''
    x = vertex[0]
    y = vertex[1]
    if x==wrap.x_min and y==wrap.y_min:
        return [(x+1,y),(x,y+1)]
    if x==wrap.x_max and y==wrap.y_min:
        return [(x-1,y),(x,y+1)] 
    if x==wrap.x_max and y==wrap.y_max:
        return [(x-1,y),(x,y-1)]
    if x==wrap.x_min and y==wrap.y_max:
        return [(x+1,y),(x,y-1)]
def side_neighbor_gen(vertex):    
    ''' generates neighbors of vertices on the side of the grid but not the corner'''
    x = vertex[0]
    y = vertex[1]
    if x==wrap.x_min:
        return [(x,y-1),(x,y+1),(x+1,y)]
    if x==wrap.x_max:
        return [(x-1,y),(x,y+1),(x,y-1)]
    if y==wrap.y_min:
        return [(x-1,y),(x+1,y),(x,y+1)] 
    if y==wrap.y_max:
        return [(x,y-1),(x-1,y),(x+1,y)] 
def normal_neighbor_gen(vertex):
    '''generates neighbors of all other vertices'''
    x = vertex[0]
    y = vertex[1]
    return [(x-1,y),(x+1,y),(x,y+1),(x,y-1)]
def path_creator(center_vertex,path_length):
    '''it will start creating a path of length given as a parameter in the function starting from the center vertex.'''
    current_vertex = center_vertex
    path = [] 
    for i in range(path_length):
        path.append(current_vertex)
        pos_det = vertex_position_determiner(current_vertex)
        if pos_det=='normal':
            ind = np.random.randint(4) 
            current_vertex = normal_neighbor_gen(current_vertex)[ind] 
        elif pos_det == 'corner': 
            ind = np.random.randint(2)
            current_vertex = corner_neighbor_gen(current_vertex)[ind]
        else:
            ind = np.random.randint(3) 
            current_vertex = side_neighbor_gen(current_vertex)[ind]
    return path

In [42]:
def wrap(x_min, x_max, y_min, y_max, center_vertex, path_length):
    ''''So here we are fixing a rectangular area inside which we are trying to create paths. So x_min,x_max,y_min,y_max
    determines the 4 corners of that rectangular area. Path length indicates the length of the path we are 
    trying to create. This function returns one true path and mentioned number of fake paths. So true path is one list and 
    fake paths is a list of lists, in other words, fake paths.'''
    wrap.x_min = x_min
    wrap.x_max = x_max
    wrap.y_min = y_min
    wrap.y_max = y_max
    return path_creator(center_vertex, path_length) 


In [43]:
def path_list_frequency_generator(path_list,epsilon,universe_list):
    '''path_list is a list of lists. This function just generates frequency histograms'''
    result_list =[]
    agg_dict = {}
    for elt in path_list:
        my_dict = frequency_counter_on_average(elt,epsilon, universe_list)
        result_list.append(my_dict)
        agg_dict = dictionary_adder(agg_dict,my_dict)
    return result_list, agg_dict

In [45]:
class querier():
    def __init__(self,true_path):
        self.true_path = true_path
    def generating_universe_list(self):
        '''upon calling this method, the querier will return a universe list which the server will send to all infected patients to do
        operations on their side.'''
        #first find the highest and lowest x and y co-ordinates in the true path of the querier.
        x_min = self.true_path[0][0]
        x_max = self.true_path[0][0]
        y_min = self.true_path[0][1]
        y_max = self.true_path[0][1]
        l = len(self.true_path)
        for i in range(1,l):
            if self.true_path[i][0]<x_min:
                x_min = self.true_path[i][0]
            if self.true_path[i][0]>x_max:
                x_max = self.true_path[i][0]
            if self.true_path[i][1]<y_min:
                y_min = self.true_path[i][1]
            if self.true_path[i][1]>y_max:
                y_max = self.true_path[i][1]
        #particularly for this instance, we are willing to generate 8 fake paths to make a big path list as universe list
        path_list = []
        center_vertex  = [self.true_path[i] for i in range(96,104)]
        for i in range(8):
            path = wrap(x_min, x_max, y_min, y_max, center_vertex[i], 200)
            path_list+=path
        path_list = self.true_path+path_list
        return list(set(path_list))

    def checker(self,big_dict):
        result_list =[]
        s = set(self.true_path)
        for elt in s:
            if big_dict.get(elt,0)>=1:
                result_list.append(elt)
        return result_list

In [50]:
class infected():
    def __init__(self,true_path,id,global_id):
        self.__true_path = true_path
        self.id = id #will be fixed by some controller part
        self.histogram = None #server will give this data afterwards, this is just a list of dictionaries.
        self.global_id = global_id
        self.all_coordinates =  []
        self.filtered_true_path = None
        self.fake_path_lists = []
        self.all_path_list =[]

    def path_finder_and_generator(self,universe_list,number_of_fake_paths,path_length):
        '''Finds the intersection of the universe list and true path which we call filtered_true_path and then also generates a bunch
        of fake paths with mentioned path_length'''
        self.filtered_true_path = list(set(universe_list) & set(self.__true_path))
        check_length = len(self.filtered_true_path)
        l = len(universe_list)
        for i in range(number_of_fake_paths):
            fake_path_index_list = np.random.choice(np.arange(0,l),replace=False,size=path_length).tolist()
            fake_path_list = [universe_list[elt] for elt in fake_path_index_list]
            fake_path_list = fake_path_list[:check_length]
            self.fake_path_lists.append(fake_path_list)
        self.all_path_list = self.fake_path_lists + [self.filtered_true_path]

    def decrementer(self, big_hist, participant_list):
        '''Each infected patient will call this method with the id of the next person unless his id is the global id in which case he just sends it 
        to the querier.'''
        true_path_histogram = self.histogram.pop(0)
        for hist in self.histogram:
            for elt in hist:
                if elt not in true_path_histogram and big_hist:
                    big_hist[elt]=big_hist[elt]-hist[elt]
        if self.id==self.global_id:
            return participant_list[-1].checker(big_hist)
        if self.id!=self.global_id:#sent to the next infected
            return participant_list[self.id+1].decrementer(big_hist, participant_list)


In [47]:
def new_aggregated_mode(infected_patients, querier, epsilon, path_length):
    '''1. After creating the querier instance in the driver code,we call the generating universe list method on querier.
    2. Feed that universe_list to all infected patient instances.
    2.For each of the infected patient in the infected_patients, we create freq histograms for each path in the path  list and also create a
    a histogram where each element's frequency is the summation of all frequencies of all the elements in path list. This part is actually 
    the same as the previuos code version.
    2.We create the participant list as a list of all instances who participated in this calculation.
    3.call the decrementer method on first infected patient'''
    universe_list = querier.generating_universe_list()
    big_dict = {}
    for infected_patient in infected_patients:
        infected_patient.path_finder_and_generator(universe_list,3,path_length)
        dict_list,agg_dict = path_list_frequency_generator(infected_patient.all_path_list,epsilon,universe_list)
        infected_patient.histogram = dict_list
        big_dict = dictionary_adder(big_dict, agg_dict)
    participant_list = infected_patients + [querier]
    return infected_patients[0].decrementer(big_dict, participant_list)

In [48]:
def accuracy_checker(predicted_path, original_path):
    '''Here we have two lists of intersections, one original and one predicted path.'''
    ps = set(predicted_path)
    os = set(original_path)
    common = ps & os
    false_signals = ps-common
    missed_signals = os-common
    return common,false_signals, missed_signals

In [53]:
#Test type 4, multiple intersections, came from all infected patients. While writing this test case, observe that 

true_pathi1 = wrap(0,19,0,79,(11,45),200)
true_pathi2 = wrap(0,19,0,79,(56,11),200)
true_pathi3 = wrap(0,19,0,79,(7,4),200)
infected1 = infected(true_pathi1, 1,3)
infected2 = infected(true_pathi2, 2,3)
infected3 = infected(true_pathi3, 3,3)
infected_patients = [infected1, infected2, infected3]
path = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (0, 10), (0, 11), (0, 12), (0, 13), (0, 14), (0, 15), (0, 16), (0, 17), (0, 18), (0, 19), (0, 20), (0, 21), (0, 22), (0, 23), (0, 24), (0, 25), (0, 26), (0, 27), (0, 28), (0, 29), (0, 30), (0, 31), (0, 32), (0, 33), (0, 34), (0, 35), (0, 36), (0, 37), (0, 38), (0, 39), (0, 40), (0, 41), (0, 42), (0, 43), (0, 44), (0, 45), (0, 46), (0, 47), (0, 48), (0, 49), (0, 50), (0, 51), (0, 52), (0, 53), (0, 54), (0, 55), (0, 56), (0, 57), (0, 58), (0, 59), (0, 60), (0, 61), (0, 62), (0, 63), (0, 64), (0, 65), (0, 66), (0, 67), (0, 68), (0, 69), (0, 70), (1, 70), (2, 70), (3, 70), (4, 70), (5, 70), (6, 70), (7, 70), (8, 70), (9, 70), (10, 70), (11, 70), (12, 70), (13, 70), (14, 70), (15, 70), (16, 70), (17, 70), (18, 70), (19, 70), (20, 70), (21, 70), (22, 70), (23, 70), (24, 70), (25, 70), (26, 70), (27, 70), (28, 70), (29, 70), (30, 70), (31, 70), (32, 70), (33, 70), (34, 70), (35, 70), (36, 70), (37, 70), (38, 70), (39, 70), (40, 70), (41, 70), (42, 70), (43, 70), (44, 70), (45, 70), (46, 70), (47, 70), (48, 70), (49, 70), (50, 70), (51, 70), (52, 70), (53, 70), (54, 70), (55, 70), (56, 70), (57, 70), (58, 70), (59, 70), (60, 70), (61, 70), (62, 70), (63, 70), (64, 70), (65, 70), (66, 70), (67, 70), (68, 70), (69, 70), (70, 70), (0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (0, 10), (0, 11), (0, 12), (0, 13), (0, 14), (0, 15), (0, 16), (0, 17), (0, 18), (0, 19), (0, 20), (0, 21), (0, 22), (0, 23), (0, 24), (0, 25), (0, 26), (0, 27), (0, 28), (0, 29), (0, 30), (0, 31), (0, 32), (0, 33), (0, 34), (0, 35), (0, 36), (0, 37), (0, 38), (0, 39), (0, 40), (0, 41), (0, 42), (0, 43), (0, 44), (0, 45), (0, 46), (0, 47), (0, 48), (0, 49), (0, 50), (0, 51), (0, 52), (0, 53), (0, 54), (0, 55), (0, 56), (0, 57), (0, 58), (0, 59), (0, 60), (0, 61), (0, 62), (0, 63), (0, 64), (0, 65), (0, 66), (0, 67), (0, 68), (0, 69), (0, 70), (70, 71), (70, 72), (70, 73), (70, 74), (70, 75), (70, 76), (70, 77), (70, 78), (70, 79), (71, 79), (72, 79), (73, 79), (74, 79), (75, 79), (76, 79), (77, 79), (78, 79), (79, 79), (79, 78), (79, 77), (79, 76), (79, 75), (79, 74), (79, 73), (79, 72), (79, 71), (79, 70), (79, 69), (79, 68), (79, 67), (79, 66), (79, 65), (79, 64), (79, 63), (79, 62), (79, 61), (79, 60), (79, 59), (79, 58), (79, 57), (79, 56), (79, 55), (79, 54), (79, 53), (79, 52), (79, 51), (79, 50), (79, 49), (79, 48), (79, 47), (79, 46), (79, 45), (79, 44), (79, 43), (79, 42), (79, 41), (79, 40), (79, 39)]
client = querier(path)#creating querier class instance
path_length = 200
epsilon = 5
new_aggregated_mode(infected_patients, client, epsilon, path_length)

[]

In [56]:
set(true_pathi1+true_pathi2+true_pathi3) & set(path)

set()