In [2]:
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats

''' 2D list of runs and rows of data '''
def get_runs(runs_string, size):
    return [run.split('\n')[size+1:] for run in runs_string.split('run=')[1:]]

'''Reads a log file and returns a 3D list by num_server, run, rows of data
    sizes: list of all possible num_servers'''
def read_and_split(filename, sizes=[3, 5, 7, 9, 15, 18, 20, 22, 24, 26, 28, 30], split_size=True):
    with open(filename, "r") as input_file:
        input_lines = input_file.read()
    server_sizes = input_lines.split('num_servers=')[1:]
    runs_array = [get_runs(server, sizes[i]) for i, server in enumerate(server_sizes)]
    return runs_array

def split_by_num_servers(filename, num_server_list):
    with open(filename, "r") as input_file:
        input_lines = input_file.read()
    return input_lines.split('num_servers=')[1:]


In [3]:
from enum import Enum

class Setting(Enum):
    LOCAL=0
    EAST_WEST=1
    EAST_EUROPE=2

class Task(Enum):
    FREQUENCY=0
    MEAN=1
    INPUT_VALIDATION=2

class Protocol(Enum):
    SEMI_HONEST=0
    MALICIOUS=1

class Stats:
    
    def __init__(self, data, confidence=0.95):
        a = 1.0 * np.array(data)
        n = len(a)
        m, se = np.mean(a), scipy.stats.sem(a)
        h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
        
        self.mean = m
        self.lower = m-h
        self.upper = m+h
        self.std = np.std(data)
    
    def __str__(self):
        return "%.6f, %.6f, %.6f, %.6f"%(self.mean,self.lower,self.upper,self.std)

class Experiment:

    def __init__(self, num_servers, setting, num_keys, task, protocol):
        self.num_servers = num_servers
        self.setting = setting
        self.num_keys = num_keys
        self.task = task
        self.protocol = protocol

    def __str__(self):
        return "Experiment: num_servers={} {} num_keys={} {} {}".format(self.num_servers, self.setting, self.num_keys, self.task, self.protocol)

    def csv_str_time(self):
        return "{}, {}".format(self.num_servers,self.time_stats)

    def csv_str_data(self):
        return "{}, {}".format(self.num_servers,self.data_stats)

    def read_data(self, runs_string):
        runs_data_array=[run.split('\n')[self.num_servers+1:] for run in runs_string.split('run=')[1:]]

        string_token = 'Time = '
        times = []
        for run in runs_data_array:
            values = []
            for result in run:
                if result.startswith(string_token) and result.endswith('seconds '):
                    try:
                        data = float(result[len(string_token):].split(' ')[0])
                        values.append(data)
                    except ValueError:
                        pass
                        #print('Found bad line')
            if len(values) > 0:
                times.append(np.mean(values))
        
        string_token = 'Global data sent = '
        data_sent = []
        for run in runs_data_array:
            data = None
            for result in run:
                if result.startswith(string_token) and result.endswith('MB') and data is None:
                    try:
                        data = float(result[len(string_token):].split(' ')[0])
                        break
                    except ValueError:
                        pass
                        #print('Found bad line')
            if data is not None:
                data_sent.append(data)
            # to_return.append(list(mean_confidence_interval(data_sent)) + [np.std(data_sent)])

        self.time_stats=Stats(times)
        self.data_stats=Stats(data_sent)
        
        return self
    
    def read_data_from_file(self, filename):
        with open(filename, 'r') as input_file:
            return self.read_data(input_file.read())



In [4]:
def read_multiple_server_experiments(filename, num_server_list, setting, num_keys, task, protocol):
    
    multiple_server_experiments = []
    per_num_server_data = split_by_num_servers(filename, num_server_list)
    return [Experiment(
            num_servers=num_server,
            setting=setting,
            num_keys=num_keys,
            task=task,
            protocol=protocol
        ).read_data(per_num_server_data[i]) for i, num_server in enumerate(num_server_list)]


In [27]:
all_experiments = []

all_experiments += read_multiple_server_experiments(
    filename='Result_Logs_size/malicious-shamir/local_mean_est_comp_nodes.txt',
    num_server_list=[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,24,26,28,30],
    setting=Setting.LOCAL,
    num_keys=1,
    task=Task.MEAN,
    protocol=Protocol.MALICIOUS
)

# all_experiments += read_multiple_server_experiments(
#     filename='Result_Logs_size/shamir/local_mean_est_comp_nodes.txt',
#     num_server_list=[3,6,10,20],
#     setting=Setting.LOCAL,
#     num_keys=1,
#     task=Task.MEAN,
#     protocol=Protocol.SEMI_HONEST
# )

for protocol, protocol_tag in [(Protocol.SEMI_HONEST, 'shamir'), (Protocol.MALICIOUS, 'malicious-shamir')]:

    for setting, setting_tag in [(Setting.EAST_WEST, 'ew'), (Setting.EAST_EUROPE, 'eu')]:

        all_experiments += read_multiple_server_experiments(
            filename='Result_Logs_size/{}/{}_mean_est_comp_nodes.txt'.format(protocol_tag, setting_tag),
            num_server_list=[3,6,10,20],
            setting=setting,
            num_keys=1,
            task=Task.MEAN,
            protocol=protocol
        )

all_experiments += read_multiple_server_experiments(
    filename='Result_Logs_size/shamir/local_mean_est_final.txt',
    num_server_list=[3, 5, 7, 9, 12, 15, 17, 20, 22, 24, 26, 30],
    setting=Setting.LOCAL,
    num_keys=1,
    task=Task.MEAN,
    protocol=Protocol.SEMI_HONEST
)

all_experiments += read_multiple_server_experiments(
    filename='Result_Logs_size/shamir/local_freq_est_final.txt',
    num_server_list=[3, 5, 7, 9, 12, 15, 17, 20, 22, 24, 26, 30],
    setting=Setting.LOCAL,
    num_keys=1,
    task=Task.FREQUENCY,
    protocol=Protocol.SEMI_HONEST
)

for protocol, protocol_tag in [(Protocol.SEMI_HONEST, 'shamir'), (Protocol.MALICIOUS, 'malicious-shamir')]:
    for setting, setting_tag in [(Setting.LOCAL, 'local'), (Setting.EAST_WEST, 'ew'), (Setting.EAST_EUROPE, 'eu')]:
        all_experiments += [
            Experiment(
                num_servers=5,
                setting=setting,
                num_keys=num_keys,
                task=Task.MEAN,
                protocol=protocol
            ).read_data_from_file('Result_Logs_keys/{}/{}_mean_est_keys_{}.txt'.format(protocol_tag, setting_tag, label))
                for num_keys, label in [(10, '1'), (100, '2'), (1000, '3'), (10000, '4')]]

for setting, setting_tag in [(Setting.LOCAL, 'local'), (Setting.EAST_WEST, 'ew'), (Setting.EAST_EUROPE, 'eu')]:
    for num_keys, label in [(10, '1'), (100, '2'), (1000, '3'), (10000, '4')]:
        all_experiments += read_multiple_server_experiments(
            filename='Result_Logs_input_validation/malicious-shamir/{}_input_validation_{}.txt'.format(setting_tag, label),
            num_server_list=[3,4,5],
            setting=setting,
            num_keys=num_keys,
            task=Task.INPUT_VALIDATION,
            protocol=Protocol.MALICIOUS
        )


In [6]:
def find(all_exp, a, b, c, d, e):
    ans = []
    for exp in all_exp:
        if exp.setting==a and exp.num_servers==b and exp.num_keys==c and exp.task==d and exp.protocol==e:
            ans += [exp]
    return ans

In [29]:
for nk in [10, 100, 1000, 10000]:
    print("number of keys={}".format(nk))
    for protocol in Protocol:
        for setting in Setting:
            found = find(all_experiments, setting, 5, nk, Task.MEAN, protocol)
            if len(found) == 1:
                print("%.2f~(%.2f)"%(found[0].time_stats.mean, found[0].time_stats.std), end=" & ")
            elif len(found) == 0:
                print("Not found!", end=' & ')
            else:
                print("Multiple Match!", end=' & ')
        print()
    print()

number of keys=10
0.08~(0.00) & 2.58~(0.09) & 5.02~(0.08) & 
0.15~(0.00) & 3.10~(0.07) & 6.08~(0.43) & 

number of keys=100
0.58~(0.03) & 5.02~(0.25) & 9.43~(0.36) & 
1.34~(0.11) & 5.64~(0.37) & 10.64~(0.27) & 

number of keys=1000
2.38~(0.03) & 32.52~(0.87) & 63.58~(1.14) & 
3.59~(0.06) & 34.28~(0.94) & 65.86~(0.38) & 

number of keys=10000
20.00~(0.07) & 307.98~(8.09) & 602.35~(5.63) & 
26.64~(0.09) & 323.65~(0.23) & 627.30~(0.39) & 



In [30]:
for num_servers in [3, 6, 10, 20]:
    print("number of servers={}".format(num_servers))
    for protocol in Protocol:
        for setting in Setting:
            found = find(all_experiments, setting, num_servers, 1, Task.MEAN, protocol)
            if len(found) == 1:
                print("%.2f~(%.2f)"%(found[0].time_stats.mean, found[0].time_stats.std), end=" & ")
            elif len(found) == 0:
                print("no match!!", end=" & ")
            else:
                print("muliple match!!", end=" & ")
        print()
    print()

number of servers=3
0.04~(0.00) & 2.32~(0.02) & 4.50~(0.05) & 
0.19~(0.04) & 2.91~(0.06) & 5.66~(0.12) & 

number of servers=6
no match!! & 2.65~(0.15) & 4.80~(0.07) & 
0.59~(0.03) & 3.55~(0.11) & 6.38~(0.14) & 

number of servers=10
no match!! & 3.01~(0.12) & 5.85~(0.27) & 
1.12~(0.05) & 4.34~(0.18) & 7.53~(0.24) & 

number of servers=20
0.57~(0.06) & 3.18~(0.14) & 5.73~(0.22) & 
2.91~(0.09) & 6.68~(0.27) & 10.17~(0.60) & 



In [32]:
## Input Validation

num_servers=3
for nk in [10, 100, 1000, 10000]:
    print("number of keys={}".format(nk))
    for setting in Setting:
        found = find(all_experiments, setting, num_servers, nk, Task.INPUT_VALIDATION, Protocol.MALICIOUS)
        if len(found) == 1:
            print("%.2f~(%.2f)"%(found[0].time_stats.mean, found[0].time_stats.std), end=" & ")
        elif len(found) == 0:
            print("no match!!", end=" & ")
        else:
            print("muliple match!!", end=" & ")

    print()
    print()

number of keys=10
0.08~(0.00) & 1.01~(0.07) & 2.02~(0.13) & 

number of keys=100
0.80~(0.03) & 1.31~(0.06) & 2.38~(0.12) & 

number of keys=1000
0.73~(0.02) & 2.02~(0.09) & 3.75~(0.16) & 

number of keys=10000
0.86~(0.05) & 9.47~(0.15) & 18.34~(0.14) & 



In [33]:
print("nodes, mean, lower, upper, std, epsilon")
for size in [3, 5, 7, 9, 12, 15, 17, 20, 22, 24, 26, 30]:
    print(find(all_experiments, Setting.LOCAL, size, 1, Task.MEAN, Protocol.MALICIOUS)[0].csv_str_time())

print("nodes, mean, lower, upper, std, epsilon")
for size in [3, 5, 7, 9, 12, 15, 17, 20, 22, 24, 26, 30]:
    print(find(all_experiments, Setting.LOCAL, size, 1, Task.MEAN, Protocol.MALICIOUS)[0].csv_str_data())

nodes, mean, lower, upper, std, epsilon
3, 0.187116, 0.179265, 0.194966, 0.039366
5, 0.561382, 0.556027, 0.566736, 0.026852
7, 0.835645, 0.828526, 0.842763, 0.035696
9, 1.088061, 1.081222, 1.094901, 0.034295
12, 1.395502, 1.381627, 1.409377, 0.069576
15, 1.983288, 1.966095, 2.000480, 0.086213
17, 2.363881, 2.345708, 2.382054, 0.091128
20, 2.906618, 2.887631, 2.925604, 0.094716
22, 3.388896, 3.369975, 3.407816, 0.094876
24, 3.983270, 3.947064, 4.019476, 0.181556
26, 4.658286, 4.633101, 4.683471, 0.126291
30, 6.360727, 6.324785, 6.396668, 0.180231
nodes, mean, lower, upper, std, epsilon
3, 10.577300, 10.577300, 10.577300, 0.000000
5, 36.111900, 36.111900, 36.111900, 0.000000
7, 80.121200, 80.121200, 80.121200, 0.000000
9, 146.384000, 146.384000, 146.384000, 0.000000
12, 261.061000, 261.061000, 261.061000, 0.000000
15, 516.926000, 516.926000, 516.926000, 0.000000
17, 710.490000, 710.490000, 710.490000, 0.000000
20, 996.294000, 996.294000, 996.294000, 0.000000
22, 1285.240000, 1285.240000,