In [1]:
#!/usr/bin/env python
__author__ = "Shweta Patwa, Danyu Sun"

import csv
import copy
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
# import seaborn as sns; sns.set_theme(color_codes=True)
import sys
import textwrap
import time

pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", None)

# plt.style.use('seaborn-deep')

# Plot histogram on $A_i$ in $D$ and $D_s$ for comparison

In [2]:
def plot_both_hist(df, df_D_s, A_i, dom_A_i):
    # https://stackoverflow.com/questions/6871201/plot-two-histograms-on-single-chart-with-matplotlib
    plt.figure(figsize=(15, 6))
    bins = np.linspace(start = 0, stop = max(max(df[A_i].tolist()), max(df_D_s[A_i].tolist())) + 1)
    plt.hist([df[A_i], df_D_s[A_i]], bins, label=['$D$', '$D_s$'])
    plt.legend(loc='best')
    plt.show()

# (Non-private) Functions to compute:
- Count query q
- Sum query q
- Median query q

In [3]:
# Selection predicate is conjunctive
def helper_apply_pred(data_df, q):
    df_for_q = data_df
    for k, v in q.items():
        for clause in v:
            ineq = clause[0]
            if ineq == '<':
                df_for_q = df_for_q[df_for_q[k] < clause[1]]
            elif ineq == '<=':
                df_for_q = df_for_q[df_for_q[k] <= clause[1]]
            elif ineq == '>':
                df_for_q = df_for_q[df_for_q[k] > clause[1]]
            elif ineq == '>=':
                df_for_q = df_for_q[df_for_q[k] >= clause[1]]
            elif ineq == '==':
                df_for_q = df_for_q[df_for_q[k] == clause[1]]
            elif ineq == '!=':
                df_for_q = df_for_q[df_for_q[k] != clause[1]]
            else:
                print("Check query!!!")
    return df_for_q

# For a counting query at a time (selection predicate is conjunctive)
def get_query_result(data_df, q):
    return helper_apply_pred(data_df, q).shape[0]

# For sum with selection  (selection predicate is conjunctive)
def get_sum(data_df, q, A_i):
    df_for_q = helper_apply_pred(data_df, q)
    return df_for_q[A_i].sum()

# Assume - median is the elem in A_i with rank m
def get_median(data_df, q, A_i):
    df_for_q = helper_apply_pred(data_df, q)
    m = math.ceil(df_for_q.shape[0]/2)
    return sorted(list(df_for_q[A_i]))[m - 1]

---
# Read $D$ and $D_s$
- $D$ is derived from the IPUMS-CPS data
- $D_s$ generated using PrivBayes from SDGym

In [4]:
# https://github.com/yuchaotao/Private-Explanation-System/blob/main/data/ipums/explore.ipynb
code_dict = {'RELATE': {101: 'Head/householder',
    201: 'Spouse',
    202: 'Opposite sex spouse',
    203: 'Same sex spouse',
    301: 'Child',
    303: 'Stepchild',
    501: 'Parent',
    701: 'Sibling',
    901: 'Grandchild',
    1001: 'Other relatives, n.s.',
    1113: 'Partner/roommate',
    1114: 'Unmarried partner',
    1116: 'Opposite sex unmarried partner',
    1117: 'Same sex unmarried partner',
    1115: 'Housemate/roomate',
    1241: 'Roomer/boarder/lodger',
    1242: 'Foster children',
    1260: 'Other nonrelatives',
    9100: 'Armed Forces, relationship unknown',
    9200: 'Age under 14, relationship unknown',
    9900: 'Relationship unknown',
    9999: 'NIU'},
    'SEX': {1: 'Male', 2: 'Female', 9: 'NIU'},
    'RACE': {100: 'White',
    200: 'Black',
    300: 'American Indian/Aleut/Eskimo',
    650: 'Asian or Pacific Islander',
    651: 'Asian only',
    652: 'Hawaiian/Pacific Islander only',
    700: 'Other (single) race, n.e.c.',
    801: 'White-Black',
    802: 'White-American Indian',
    803: 'White-Asian',
    804: 'White-Hawaiian/Pacific Islander',
    805: 'Black-American Indian',
    806: 'Black-Asian',
    807: 'Black-Hawaiian/Pacific Islander',
    808: 'American Indian-Asian',
    809: 'Asian-Hawaiian/Pacific Islander',
    810: 'White-Black-American Indian',
    811: 'White-Black-Asian',
    812: 'White-American Indian-Asian',
    813: 'White-Asian-Hawaiian/Pacific Islander',
    814: 'White-Black-American Indian-Asian',
    815: 'American Indian-Hawaiian/Pacific Islander',
    816: 'White-Black--Hawaiian/Pacific Islander',
    817: 'White-American Indian-Hawaiian/Pacific Islander',
    818: 'Black-American Indian-Asian',
    819: 'White-American Indian-Asian-Hawaiian/Pacific Islander',
    820: 'Two or three races, unspecified',
    830: 'Four or five races, unspecified',
    999: 'Blank'},
    'MARST': {1: 'Married, spouse present',
    2: 'Married, spouse absent',
    3: 'Separated',
    4: 'Divorced',
    5: 'Widowed',
    6: 'Never married/single',
    7: 'Widowed or Divorced',
    9: 'NIU'},
    'CITIZEN': {1: 'Born in U.S',
    2: 'Born in U.S. outlying',
    3: 'Born abroad of American parents',
    4: 'Naturalized citizen',
    5: 'Not a citizen',
    9: 'NIU'},
    'WORKLY': {0: 'NIU',
    1: 'No',
    2: 'Yes'},
    'CLASSWKR': {0: 'NIU',
    10: 'Self-employed',
    13: 'Self-employed, not incorporated',
    14: 'Self-employed, incorporated',
    20: 'Works for wages or salary',
    21: 'Wage/salary, private',
    22: 'Private, for profit',
    23: 'Private, nonprofit',
    24: 'Wage/salary, government',
    25: 'Federal government employee',
    26: 'Armed forces',
    27: 'State government employee',
    28: 'Local government employee',
    29: 'Unpaid family worker',
    99: 'Missing/Unknown'},
    'EDUC': {0: 'NIU or no schooling',
    1: 'NIU or blank',
    2: 'None or preschool',
    10: 'Grades 1, 2, 3, or 4',
    11: 'Grade 1',
    12: 'Grade 2',
    13: 'Grade 3',
    14: 'Grade 4',
    20: 'Grades 5 or 6',
    21: 'Grade 5',
    22: 'Grade 6',
    30: 'Grades 7 or 8',
    31: 'Grade 7',
    32: 'Grade 8',
    40: 'Grade 9',
    50: 'Grade 10',
    60: 'Grade 11',
    70: 'Grade 12',
    71: '12th grade, no diploma',
    72: '12th grade, diploma unclear',
    73: 'High school diploma or equivalent',
    80: '1 year of college',
    81: 'Some college but no degree',
    90: '2 years of college',
    91: "Associate's degree, occupational/vocational program",
    92: "Associate's degree, academic program",
    100: '3 years of college',
    110: '4 years of college',
    111: "Bachelor's degree",
    120: '5+ years of college',
    121: '5 years of college',
    122: '6+ years of college',
    123: "Master's degree",
    124: 'Professional school degree',
    125: 'Doctorate degree',
    999: 'Missing/Unknown'}
}

In [6]:
df = pd.read_csv("./2011_2019_D.csv")
col_names = list(df.columns)
print(col_names)

rel = list(code_dict['RELATE'].values())
age = [i for i in range(0, 80)] + [80, 85] # https://cps.ipums.org/cps-action/variables/AGE#codes_section
sex = list(code_dict['SEX'].values())
rac = list(code_dict['RACE'].values())
mar = list(code_dict['MARST'].values())
cit = list(code_dict['CITIZEN'].values())
wor = list(code_dict['WORKLY'].values())
cla = list(code_dict['CLASSWKR'].values())
edu = list(code_dict['EDUC'].values())
inc = [0, 500000] # https://cps.ipums.org/cps-action/variables/INCTOT#codes_section

df_D_s = pd.read_csv("./2011_2019_D_s.csv")
df_D_s['AGE'] = df_D_s['AGE'].astype(int)
df_D_s['INCTOT'] = df_D_s['INCTOT'].astype(int)

['RELATE', 'AGE', 'SEX', 'RACE', 'MARST', 'CITIZEN', 'CLASSWKR', 'EDUC', 'WORKLY', 'INCTOT']


---
# Variables  $\epsilon, \tau$
---

# Detect FP/FN

In [7]:
def FP_FN(re, q_D, q_D_s, tau):
    if (re == 0 and (q_D_s - tau < q_D and q_D < q_D_s + tau)):
        return 'FN'
    if (re == 1 and (q_D <= q_D_s - tau or q_D >= q_D_s + tau)):
        return 'FP'

# $LM_{count}$

In [8]:
def lmcount(q, df, df_D_s, tau, eps, f_handle):
    re = -1
    
    q_D = get_query_result(df, q)
    q_D_s = get_query_result(df_D_s, q)
    l = q_D_s - tau
    r = q_D_s + tau
    
    f_handle.write("(I = (%s, %s), Truth = %s, eps = %s) Algo returns:\n" 
          %(l, r, "Distance bound satisfied" if (l < q_D and q_D < r) else "Distance bound unmet", eps))
    # ----------------------------------------------------------------------------------------------------------------
    nu_q = np.random.laplace(scale = 1/eps)
    f_handle.write("\tDP estimate = %s + %s = %s\n" %(q_D, nu_q, q_D + nu_q))
    
    if -1*tau < q_D - q_D_s + nu_q and q_D - q_D_s + nu_q < tau:
        f_handle.write("Distance bound satisfied\n")
        re = 1
    else:
        f_handle.write("Distance bound unmet\n")
        re = 0
    return re, q_D, q_D_s

In [9]:
# Empirical error
def err_lmcount(q, df, df_D_s, tau, eps, f_handle):
    start_time = time.time()
    
    FN = 0
    FP = 0
    for i in range(100):
        re, q_D, q_D_s = lmcount(q, df, df_D_s, tau, eps, f_handle)
        tmp = FP_FN(re, q_D, q_D_s, tau)
        if tmp == 'FN':
            FN += 1
        if tmp == 'FP':
            FP += 1
    err = (FN + FP)/100
    f_handle.write("\n")
    
    print("---- %s seconds ----" % (time.time() - start_time))
    
    return err

# $EM_{count}$

In [10]:
def uprime(q_D, l, r, tau):
    q_D_s = (l + r)/2
    
    if (q_D <= l - tau or q_D >= r + tau):
        scr0 = 1
        scr1 = 0
    elif (q_D > l - tau and q_D <= q_D_s):
        scr1 = (q_D - (l - tau)) / (2 * tau)
        scr0 = 1 - scr1
    elif (q_D >= q_D_s and q_D < r + tau):
        scr0 = (q_D - q_D_s) / (2 * tau)
        scr1 = 1 - scr0
    return scr0, scr1

In [41]:
def emcount(q, df, df_D_s, tau, eps, f_handle):
    #print('tau = ', tau)
    re = -1
    
    q_D = get_query_result(df, q)
    q_D_s = get_query_result(df_D_s, q)
    l = q_D_s - tau
    r = q_D_s + tau
    
    f_handle.write("(I = (%s, %s), Truth = %s, eps = %s) Algo returns:\n" 
          %(l, r, "Distance bound satisfied" if (l < q_D and q_D < r) else "Distance bound unmet", eps))
    # ----------------------------------------------------------------------------------------------------------------
    scr0, scr1 = uprime(q_D, l, r, tau)
#     print('scr0', scr0)
#     print('scr1', scr1)
    pr0 = np.exp(eps*scr0*tau)                       # math overflow error possible
    pr1 = np.exp(eps*scr1*tau)                       # math overflow error possible
    #print(scr0, scr1, pr0, pr1, "\n")
    
    # https://stackoverflow.com/a/25512004
    p0 = pr0/(pr0 + pr1)
    if pd.isna(p0):
        p0 = 1
    p1 = pr1/(pr0 + pr1)
    if pd.isna(p1):
        p1 = 1
    
    #print('p0, p1', p0, p1)
    f_handle.write("\tPr[o = 0] = %.5f, Pr[o = 1] = %.5f\n" %(p0, p1))
    
    re = random.choices([0, 1], weights = [p0, p1])[0]
    if re == 0:
        f_handle.write("Distance bound unmet\n")
    else:
        f_handle.write("Distance bound satisfied\n")
    return re, q_D, q_D_s

In [12]:
# Empirical error
def err_emcount(q, df, df_D_s, tau, eps, f_handle):
    start_time = time.time()
    
    FN = 0
    FP = 0
    for i in range(100):
        re, q_D, q_D_s = emcount(q, df, df_D_s, tau, eps, f_handle)
        tmp = FP_FN(re, q_D, q_D_s, tau)
        if tmp == 'FN':
            FN += 1
        if tmp == 'FP':
            FP += 1
    err = (FN + FP)/100
    f_handle.write("\n")
    
    print("---- %s seconds ----" % (time.time() - start_time))
    
    return err

---
---
# 12 queries (3 small, 3 medium, 3 large, 3 very large)

In [13]:
# df3
q1 = {'SEX': [['==', 'Female']], 'RACE': [['==', 'White-American Indian-Asian']], 'WORKLY': [['==', 'Yes']]} # q(D) is 34
q6 = {'SEX': [['==', 'Female']], 'RACE': [['==', 'White-Black']], 'WORKLY': [['==', 'Yes']]} #1893
q7 = {'SEX': [['==', 'Female']], 'RACE': [['==', 'Asian only']], 'WORKLY': [['==', 'No']]}   #18693
q11 = {'SEX': [['==', 'Female']], 'RACE': [['==', 'White']], 'WORKLY': [['==', 'Yes']]} #320934

#df5
q3 = {'SEX': [['==', 'Male']], 'EDUC': [['==', "Doctorate degree"]], 'MARST': [['==', 'Separated']]} #87
q5 = {'SEX': [['==', 'Female']], 'EDUC': [['==', "Grades 5 or 6"]], 'MARST': [['==', 'Never married/single']]} #1560
q8 = {'SEX': [['==', 'Female']], 'EDUC': [['==', "Master's degree"]], 'MARST': [['==', 'Married, spouse present']]} # 36756
q10 = {'SEX': [['==', 'Female']], 'EDUC': [['==', "Bachelor's degree"]], 'MARST': [['==', 'Married, spouse present']]} #79723

# df7
q2 = {'RACE': [['==', 'White-Black']], 'MARST': [['==', "Never married/single"]], 'CITIZEN': [['==', 'Naturalized citizen']]} # 54
q4 = {'RACE': [['==', 'White-American Indian']], 'MARST': [['==', "Widowed"]], 'CITIZEN': [['==', 'Born in U.S']]} # 624
q9 = {'RACE': [['==', 'Black']], 'MARST': [['==', "Married, spouse present"]], 'CITIZEN': [['==', 'Born in U.S']]} # 41691
q12 = {'RACE': [['==', 'White']], 'MARST': [['==', "Married, spouse present"]], 'CITIZEN': [['==', 'Born in U.S']]} # 471994

queries = [q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12]

In [14]:
# For debugging only
for i in queries:
    print(get_query_result(df, i))

34
54
87
624
1560
1893
18693
36756
41691
79723
320934
471994


# Experiments

In [17]:
tau_frac = [0.002, 0.008, 0.032, 0.128, 0.512]   
default_eps = 0.25

eps = [0.0625, 0.125, 0.25, 0.5, 1]   
default_tau = 0.032

delta = 0.05

In [21]:
lst_LM_vary_tau = [[], [], [], [], [], [], [], [], [], [], [], []]
lst_LM_vary_eps = [[], [], [], [], [], [], [], [], [], [], [], []]

lst_EM_vary_tau = [[], [], [], [], [], [], [], [], [], [], [], []]
lst_EM_vary_eps = [[], [], [], [], [], [], [], [], [], [], [], []]

# LM---vary tau

In [23]:
f_handle = open('count_LM_vary_tau.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_query_result(df_D_s, i)
    lst_LM_vary_tau[num].append(err_lmcount(i, df, df_D_s, tau_frac[0]*q_D_s, default_eps, f_handle))
    lst_LM_vary_tau[num].append(err_lmcount(i, df, df_D_s, tau_frac[1]*q_D_s, default_eps, f_handle))
    lst_LM_vary_tau[num].append(err_lmcount(i, df, df_D_s, tau_frac[2]*q_D_s, default_eps, f_handle))
    lst_LM_vary_tau[num].append(err_lmcount(i, df, df_D_s, tau_frac[3]*q_D_s, default_eps, f_handle))
    lst_LM_vary_tau[num].append(err_lmcount(i, df, df_D_s, tau_frac[4]*q_D_s, default_eps, f_handle))
    print()
    num = num + 1
f_handle.close()

---- 29.5660138130188 seconds ----
---- 29.55412793159485 seconds ----
---- 29.43956971168518 seconds ----
---- 29.639163970947266 seconds ----
---- 29.573652029037476 seconds ----

---- 11.968151092529297 seconds ----
---- 11.972915172576904 seconds ----
---- 11.97863507270813 seconds ----
---- 12.015078783035278 seconds ----
---- 12.037144184112549 seconds ----

---- 29.29737114906311 seconds ----
---- 29.222767114639282 seconds ----
---- 29.21766185760498 seconds ----
---- 29.283878087997437 seconds ----
---- 29.20104193687439 seconds ----

---- 12.165225982666016 seconds ----
---- 12.261629104614258 seconds ----
---- 12.163660049438477 seconds ----
---- 12.174993991851807 seconds ----
---- 12.187263011932373 seconds ----

---- 30.948325872421265 seconds ----
---- 30.679779052734375 seconds ----
---- 30.594241857528687 seconds ----
---- 30.557883977890015 seconds ----
---- 30.576164960861206 seconds ----

---- 30.026082038879395 seconds ----
---- 30.137118101119995 seconds ----
----

In [25]:
print(lst_LM_vary_tau)

[[0.98, 0.96, 0.74, 0.39, 0.01], [0.99, 0.91, 0.63, 0.2, 0.0], [0.0, 0.08, 0.26, 0.09, 0.0], [0.73, 0.32, 0.0, 0.0, 0.0], [0.0, 0.0, 0.08, 0.0, 0.0], [0.13, 0.09, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0]]


# LM---vary eps

In [24]:
f_handle = open('count_LM_vary_eps.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_query_result(df_D_s, i)
    lst_LM_vary_eps[num].append(err_lmcount(i, df, df_D_s, default_tau*q_D_s, eps[0], f_handle))
    lst_LM_vary_eps[num].append(err_lmcount(i, df, df_D_s, default_tau*q_D_s, eps[1], f_handle))
    lst_LM_vary_eps[num].append(err_lmcount(i, df, df_D_s, default_tau*q_D_s, eps[2], f_handle))
    lst_LM_vary_eps[num].append(err_lmcount(i, df, df_D_s, default_tau*q_D_s, eps[3], f_handle))
    lst_LM_vary_eps[num].append(err_lmcount(i, df, df_D_s, default_tau*q_D_s, eps[4], f_handle))
    print()
    num = num + 1
f_handle.close()

---- 30.425354957580566 seconds ----
---- 30.379791736602783 seconds ----
---- 30.9950909614563 seconds ----
---- 30.58956217765808 seconds ----
---- 30.66997504234314 seconds ----

---- 12.30012321472168 seconds ----
---- 12.355924129486084 seconds ----
---- 12.312242984771729 seconds ----
---- 12.312663078308105 seconds ----
---- 12.275341749191284 seconds ----

---- 30.058440923690796 seconds ----
---- 30.067426919937134 seconds ----
---- 29.956346035003662 seconds ----
---- 30.04486608505249 seconds ----
---- 29.908710718154907 seconds ----

---- 12.513329029083252 seconds ----
---- 12.455528974533081 seconds ----
---- 12.582563161849976 seconds ----
---- 12.429698944091797 seconds ----
---- 12.549763202667236 seconds ----

---- 31.276653051376343 seconds ----
---- 31.285094261169434 seconds ----
---- 31.418808221817017 seconds ----
---- 31.424975156784058 seconds ----
---- 31.424768924713135 seconds ----

---- 30.71303105354309 seconds ----
---- 30.75643014907837 seconds ----
----

In [26]:
print(lst_LM_vary_eps)

[[0.92, 0.86, 0.78, 0.53, 0.3], [0.86, 0.71, 0.7, 0.43, 0.23], [0.15, 0.28, 0.29, 0.3, 0.15], [0.25, 0.1, 0.0, 0.0, 0.0], [0.4, 0.3, 0.11, 0.03, 0.0], [0.04, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0]]


# EM---vary tau

In [27]:
f_handle = open('count_EM_vary_tau.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_query_result(df_D_s, i)
    lst_EM_vary_tau[num].append(err_emcount(i, df, df_D_s, tau_frac[0]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emcount(i, df, df_D_s, tau_frac[1]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emcount(i, df, df_D_s, tau_frac[2]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emcount(i, df, df_D_s, tau_frac[3]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emcount(i, df, df_D_s, tau_frac[4]*q_D_s, default_eps, f_handle))
    print()
    num = num + 1
f_handle.close()
print(lst_EM_vary_tau)

---- 30.413900136947632 seconds ----
---- 30.40568208694458 seconds ----
---- 30.50230884552002 seconds ----
---- 30.613609075546265 seconds ----
---- 30.46838903427124 seconds ----

---- 12.289787292480469 seconds ----
---- 12.261216878890991 seconds ----
---- 12.307394981384277 seconds ----
---- 12.356078147888184 seconds ----
---- 12.309861898422241 seconds ----

---- 30.041593074798584 seconds ----
---- 29.99017596244812 seconds ----
---- 30.017870903015137 seconds ----
---- 29.981750965118408 seconds ----
---- 30.058231115341187 seconds ----

---- 12.561118125915527 seconds ----
---- 12.527657985687256 seconds ----
---- 12.558181285858154 seconds ----
---- 12.503659963607788 seconds ----
---- 12.571141004562378 seconds ----

---- 31.381099224090576 seconds ----
---- 32.59813404083252 seconds ----
---- 33.69029402732849 seconds ----
---- 32.909822940826416 seconds ----
---- 31.949883937835693 seconds ----

---- 30.882697820663452 seconds ----
---- 30.826816082000732 seconds ----
--

  pr1 = np.exp(eps*scr1*tau)                       # math overflow error possible
  p1 = pr1/(pr0 + pr1)


---- 32.45803499221802 seconds ----

---- 33.64285182952881 seconds ----
---- 33.419920682907104 seconds ----
---- 33.51326107978821 seconds ----
---- 33.32285284996033 seconds ----
---- 33.03288292884827 seconds ----

---- 19.49807119369507 seconds ----
---- 19.475538969039917 seconds ----
---- 19.474766969680786 seconds ----
---- 19.473047018051147 seconds ----
---- 19.537797927856445 seconds ----

---- 36.0555100440979 seconds ----
---- 36.04280400276184 seconds ----
---- 36.06537866592407 seconds ----
---- 36.19998598098755 seconds ----
---- 36.07975196838379 seconds ----

---- 49.42963695526123 seconds ----
---- 49.36865997314453 seconds ----
---- 49.333359241485596 seconds ----
---- 49.58110213279724 seconds ----
---- 50.11113405227661 seconds ----

---- 61.202412843704224 seconds ----
---- 60.835232973098755 seconds ----
---- 60.90174984931946 seconds ----
---- 60.53388500213623 seconds ----
---- 59.2073609828949 seconds ----

[[0.51, 0.47, 0.44, 0.18, 0.0], [0.61, 0.47, 0.33, 0

# EM---vary eps

In [28]:
f_handle = open('count_EM_vary_eps.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_query_result(df_D_s, i)
    lst_EM_vary_eps[num].append(err_emcount(i, df, df_D_s, default_tau*q_D_s, eps[0], f_handle))
    lst_EM_vary_eps[num].append(err_emcount(i, df, df_D_s, default_tau*q_D_s, eps[1], f_handle))
    lst_EM_vary_eps[num].append(err_emcount(i, df, df_D_s, default_tau*q_D_s, eps[2], f_handle))
    lst_EM_vary_eps[num].append(err_emcount(i, df, df_D_s, default_tau*q_D_s, eps[3], f_handle))
    lst_EM_vary_eps[num].append(err_emcount(i, df, df_D_s, default_tau*q_D_s, eps[4], f_handle))
    print()
    num = num + 1
f_handle.close()
print(lst_EM_vary_eps)

---- 29.97802972793579 seconds ----
---- 29.934284210205078 seconds ----
---- 29.857176065444946 seconds ----
---- 29.92991805076599 seconds ----
---- 30.12750506401062 seconds ----

---- 12.39773416519165 seconds ----
---- 12.43544602394104 seconds ----
---- 12.194863319396973 seconds ----
---- 12.175012111663818 seconds ----
---- 12.18780779838562 seconds ----

---- 30.83283829689026 seconds ----
---- 31.437389850616455 seconds ----
---- 30.49139714241028 seconds ----
---- 31.200613021850586 seconds ----
---- 31.034878969192505 seconds ----

---- 13.04771375656128 seconds ----
---- 12.976233959197998 seconds ----
---- 12.988255023956299 seconds ----
---- 12.863420248031616 seconds ----
---- 12.84258508682251 seconds ----

---- 32.20544910430908 seconds ----
---- 32.236549854278564 seconds ----
---- 32.333300828933716 seconds ----
---- 32.26023077964783 seconds ----
---- 32.71673798561096 seconds ----

---- 31.778605222702026 seconds ----
---- 32.13096499443054 seconds ----
---- 31.79

  pr1 = np.exp(eps*scr1*tau)                       # math overflow error possible
  p1 = pr1/(pr0 + pr1)


---- 34.33562994003296 seconds ----

---- 20.356059789657593 seconds ----
---- 20.441838026046753 seconds ----
---- 20.135108947753906 seconds ----
---- 20.14837884902954 seconds ----
---- 20.143885850906372 seconds ----

---- 37.230945110321045 seconds ----
---- 37.24524402618408 seconds ----
---- 37.250020027160645 seconds ----
---- 37.23586988449097 seconds ----
---- 37.23193407058716 seconds ----

---- 51.207175731658936 seconds ----
---- 50.99933099746704 seconds ----
---- 51.02075695991516 seconds ----
---- 51.00845694541931 seconds ----
---- 51.02211785316467 seconds ----

---- 62.393113136291504 seconds ----
---- 62.45693373680115 seconds ----
---- 62.22824501991272 seconds ----
---- 62.49173665046692 seconds ----


  pr0 = np.exp(eps*scr0*tau)                       # math overflow error possible
  p0 = pr0/(pr0 + pr1)


---- 63.2917640209198 seconds ----

[[0.52, 0.49, 0.45, 0.42, 0.27], [0.47, 0.45, 0.4, 0.39, 0.14], [0.43, 0.38, 0.51, 0.41, 0.21], [0.2, 0.11, 0.0, 0.0, 0.0], [0.4, 0.28, 0.23, 0.08, 0.01], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.57]]
