In [1]:
#!/usr/bin/env python
__author__ = "Shweta Patwa, Danyu Sun"

import csv
import copy
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
# import seaborn as sns; sns.set_theme(color_codes=True)
import sys
import textwrap
import time

pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", None)

# plt.style.use('seaborn-deep')

# Plot histogram on $A_i$ in $D$ and $D_s$ for comparison

In [2]:
def plot_both_hist(df, df_D_s, A_i, dom_A_i):
    # https://stackoverflow.com/questions/6871201/plot-two-histograms-on-single-chart-with-matplotlib
    plt.figure(figsize=(15, 6))
    bins = np.linspace(start = 0, stop = max(max(df[A_i].tolist()), max(df_D_s[A_i].tolist())) + 1)
    plt.hist([df[A_i], df_D_s[A_i]], bins, label=['$D$', '$D_s$'])
    plt.legend(loc='best')
    plt.show()

# (Non-private) Functions to compute:
- Count query q
- Sum query q
- Median query q

In [3]:
# Selection predicate is conjunctive
def helper_apply_pred(data_df, q):
    df_for_q = data_df
    for k, v in q.items():
        for clause in v:
            ineq = clause[0]
            if ineq == '<':
                df_for_q = df_for_q[df_for_q[k] < clause[1]]
            elif ineq == '<=':
                df_for_q = df_for_q[df_for_q[k] <= clause[1]]
            elif ineq == '>':
                df_for_q = df_for_q[df_for_q[k] > clause[1]]
            elif ineq == '>=':
                df_for_q = df_for_q[df_for_q[k] >= clause[1]]
            elif ineq == '==':
                df_for_q = df_for_q[df_for_q[k] == clause[1]]
            elif ineq == '!=':
                df_for_q = df_for_q[df_for_q[k] != clause[1]]
            else:
                print("Check query!!!")
    return df_for_q

# For a counting query at a time (selection predicate is conjunctive)
def get_query_result(data_df, q):
    return helper_apply_pred(data_df, q).shape[0]

# For sum with selection  (selection predicate is conjunctive)
def get_sum(data_df, q, A_i):
    df_for_q = helper_apply_pred(data_df, q)
    return df_for_q[A_i].sum()

# Assume - median is the elem in A_i with rank m
def get_median(data_df, q, A_i):
    df_for_q = helper_apply_pred(data_df, q)
    m = math.ceil(df_for_q.shape[0]/2)
    return sorted(list(df_for_q[A_i]))[m - 1]

---
# Read $D$ and $D_s$
- $D$ is derived from the IPUMS-CPS data
- $D_s$ generated using PrivBayes from SDGym

In [4]:
# https://github.com/yuchaotao/Private-Explanation-System/blob/main/data/ipums/explore.ipynb
code_dict = {'RELATE': {101: 'Head/householder',
    201: 'Spouse',
    202: 'Opposite sex spouse',
    203: 'Same sex spouse',
    301: 'Child',
    303: 'Stepchild',
    501: 'Parent',
    701: 'Sibling',
    901: 'Grandchild',
    1001: 'Other relatives, n.s.',
    1113: 'Partner/roommate',
    1114: 'Unmarried partner',
    1116: 'Opposite sex unmarried partner',
    1117: 'Same sex unmarried partner',
    1115: 'Housemate/roomate',
    1241: 'Roomer/boarder/lodger',
    1242: 'Foster children',
    1260: 'Other nonrelatives',
    9100: 'Armed Forces, relationship unknown',
    9200: 'Age under 14, relationship unknown',
    9900: 'Relationship unknown',
    9999: 'NIU'},
    'SEX': {1: 'Male', 2: 'Female', 9: 'NIU'},
    'RACE': {100: 'White',
    200: 'Black',
    300: 'American Indian/Aleut/Eskimo',
    650: 'Asian or Pacific Islander',
    651: 'Asian only',
    652: 'Hawaiian/Pacific Islander only',
    700: 'Other (single) race, n.e.c.',
    801: 'White-Black',
    802: 'White-American Indian',
    803: 'White-Asian',
    804: 'White-Hawaiian/Pacific Islander',
    805: 'Black-American Indian',
    806: 'Black-Asian',
    807: 'Black-Hawaiian/Pacific Islander',
    808: 'American Indian-Asian',
    809: 'Asian-Hawaiian/Pacific Islander',
    810: 'White-Black-American Indian',
    811: 'White-Black-Asian',
    812: 'White-American Indian-Asian',
    813: 'White-Asian-Hawaiian/Pacific Islander',
    814: 'White-Black-American Indian-Asian',
    815: 'American Indian-Hawaiian/Pacific Islander',
    816: 'White-Black--Hawaiian/Pacific Islander',
    817: 'White-American Indian-Hawaiian/Pacific Islander',
    818: 'Black-American Indian-Asian',
    819: 'White-American Indian-Asian-Hawaiian/Pacific Islander',
    820: 'Two or three races, unspecified',
    830: 'Four or five races, unspecified',
    999: 'Blank'},
    'MARST': {1: 'Married, spouse present',
    2: 'Married, spouse absent',
    3: 'Separated',
    4: 'Divorced',
    5: 'Widowed',
    6: 'Never married/single',
    7: 'Widowed or Divorced',
    9: 'NIU'},
    'CITIZEN': {1: 'Born in U.S',
    2: 'Born in U.S. outlying',
    3: 'Born abroad of American parents',
    4: 'Naturalized citizen',
    5: 'Not a citizen',
    9: 'NIU'},
    'WORKLY': {0: 'NIU',
    1: 'No',
    2: 'Yes'},
    'CLASSWKR': {0: 'NIU',
    10: 'Self-employed',
    13: 'Self-employed, not incorporated',
    14: 'Self-employed, incorporated',
    20: 'Works for wages or salary',
    21: 'Wage/salary, private',
    22: 'Private, for profit',
    23: 'Private, nonprofit',
    24: 'Wage/salary, government',
    25: 'Federal government employee',
    26: 'Armed forces',
    27: 'State government employee',
    28: 'Local government employee',
    29: 'Unpaid family worker',
    99: 'Missing/Unknown'},
    'EDUC': {0: 'NIU or no schooling',
    1: 'NIU or blank',
    2: 'None or preschool',
    10: 'Grades 1, 2, 3, or 4',
    11: 'Grade 1',
    12: 'Grade 2',
    13: 'Grade 3',
    14: 'Grade 4',
    20: 'Grades 5 or 6',
    21: 'Grade 5',
    22: 'Grade 6',
    30: 'Grades 7 or 8',
    31: 'Grade 7',
    32: 'Grade 8',
    40: 'Grade 9',
    50: 'Grade 10',
    60: 'Grade 11',
    70: 'Grade 12',
    71: '12th grade, no diploma',
    72: '12th grade, diploma unclear',
    73: 'High school diploma or equivalent',
    80: '1 year of college',
    81: 'Some college but no degree',
    90: '2 years of college',
    91: "Associate's degree, occupational/vocational program",
    92: "Associate's degree, academic program",
    100: '3 years of college',
    110: '4 years of college',
    111: "Bachelor's degree",
    120: '5+ years of college',
    121: '5 years of college',
    122: '6+ years of college',
    123: "Master's degree",
    124: 'Professional school degree',
    125: 'Doctorate degree',
    999: 'Missing/Unknown'}
}

In [5]:
df = pd.read_csv("./2011_2019_D.csv")
col_names = list(df.columns)
print(col_names)

rel = list(code_dict['RELATE'].values())
age = [i for i in range(0, 80)] + [80, 85] # https://cps.ipums.org/cps-action/variables/AGE#codes_section
sex = list(code_dict['SEX'].values())
rac = list(code_dict['RACE'].values())
mar = list(code_dict['MARST'].values())
cit = list(code_dict['CITIZEN'].values())
wor = list(code_dict['WORKLY'].values())
cla = list(code_dict['CLASSWKR'].values())
edu = list(code_dict['EDUC'].values())
inc = [0, 500000] # https://cps.ipums.org/cps-action/variables/INCTOT#codes_section

df_D_s = pd.read_csv("./2011_2019_D_s.csv")
df_D_s['AGE'] = df_D_s['AGE'].astype(int)
df_D_s['INCTOT'] = df_D_s['INCTOT'].astype(int)

['RELATE', 'AGE', 'SEX', 'RACE', 'MARST', 'CITIZEN', 'CLASSWKR', 'EDUC', 'WORKLY', 'INCTOT']


---
# Variables $\epsilon, \tau$
---

# Detect FP/FN

In [6]:
def FP_FN(re, q_D, q_D_s, tau):
    if (re == 0 and (q_D_s - tau < q_D and q_D < q_D_s + tau)):
        return 'FN'
    if (re == 1 and (q_D <= q_D_s - tau or q_D >= q_D_s + tau)):
        return 'FP'

# $EM_{med}$

In [7]:
def helper_EM_pr(q, A_i, dom_A_i, df, eps, f_handle):
    n = get_query_result(df, q)
    
    rank = {}
    score = {}
    prob = {}
    for i in dom_A_i:
        new_conjunct = copy.deepcopy(q)
        if A_i in new_conjunct:
            new_conjunct[A_i].append(['<', i])
        else:
            new_conjunct[A_i] = [['<', i]]
#         print("(EM Pr) %s" %(new_conjunct))
        
        rank[i] = get_query_result(df, new_conjunct)
        score[i] = -1*abs(rank[i] - n/2)
        prob[i] = np.exp(eps*score[i]/2)
#         print("%s: %s\t %s\t %s" %(i, rank[i], score[i], prob[i]))
    
    lst = list(prob.keys())
    pr = list(prob.values())
    tot = sum(pr)
    pr = [pr[i]/tot for i in range(len(pr))]
    
#     for i in range(len(lst)):
#         elem = lst[i]
#         f_handle.write("%s\t\t%s\t\t%s\t\t%.10f\n" %(elem, rank[elem], score[elem], pr[i]))
#     f_handle.write("\n")
    return lst, pr

In [8]:
def emmed(q, A_i, dom_A_i, df, df_D_s, tau, eps, lst, pr, f_handle):
    re = -1
    
    q_D = get_median(df, q, A_i)
    q_D_s = get_median(df_D_s, q, A_i)
    l = q_D_s - tau
    r = q_D_s + tau
    
    f_handle.write("(I = (%s, %s), q(D) = %s, Truth = %s, eps = %s) Algo returns:\n" 
          %(l, r, q_D, "Distance bound satisfied" if (l < q_D and q_D < r) else "Distance bound unmet", eps))
    # ----------------------------------------------------------------------------------------------------------------
    ans = random.choices(lst, weights = pr)[0]
    f_handle.write("\tDP estimate = %s\n" %(ans))
    
    re = (l < ans and ans < r)
    if re == 0:
        f_handle.write("Distance bound unmet\n")
    else:
        f_handle.write("Distance bound satisfied\n")
    return re, q_D, q_D_s

In [9]:
def err_emmed(q, A_i, dom_A_i, df, df_D_s, tau, eps, f_handle):
    start_time = time.time()
    
    lst, pr = helper_EM_pr(q, A_i, dom_A_i, df, eps, f_handle)
    
    FN = 0
    FP = 0
    for i in range(100):
        re, q_D, q_D_s = emmed(q, A_i, dom_A_i, df, df_D_s, tau, eps, lst, pr, f_handle)
        tmp = FP_FN(re, q_D, q_D_s, tau)
        if tmp == 'FN':
            FN += 1
        if tmp == 'FP':
            FP += 1
    err = (FN + FP)/100
    f_handle.write("\n")
    
    print("---- %s seconds ----" % (time.time() - start_time))
    
    return err

# $Hist_{med}$

In [10]:
def histmed(q, A_i, df, df_D_s, tau, eps, f_handle):
    re = -1
    
    q_D = get_median(df, q, A_i)
    q_D_s = get_median(df_D_s, q, A_i)
    l = q_D_s - tau
    r = q_D_s + tau
    
    nu_q = np.random.laplace(scale = 1/(eps/2))
    m = math.ceil((get_query_result(df, q) + nu_q)/2)
    
    f_handle.write("(I = (%s, %s), q(D) = %s, Truth = %s, eps = %s) Algo returns:\n" 
          %(l, r, q_D, "Distance bound satisfied" if (l < q_D and q_D < r) else "Distance bound unmet", eps))
    # ----------------------------------------------------------------------------------------------------------------
    new_conjunct = copy.deepcopy(q)
    if A_i in new_conjunct:
        new_conjunct[A_i].append(['<=', l])
    else:
        new_conjunct[A_i] = [['<=', l]]
#     print("(Hist) %s" %(new_conjunct))
    q1 = get_query_result(df, new_conjunct)
    nu_q1 = np.random.laplace(scale = 1/(eps/2))
    f_handle.write("\tDP estimate q1 = %s + %s = %s\t\t vs %s\n" %(q1, nu_q1, q1 + nu_q1, m))
    
    new_conjunct = {}
    new_conjunct = copy.deepcopy(q)
    if A_i in new_conjunct:
        new_conjunct[A_i].append(['>=', r])
    else:
        new_conjunct[A_i] = [['>=', r]]
#     print("(Hist) %s\n" %(new_conjunct))
    q2 = get_query_result(df, new_conjunct)
    nu_q2 = np.random.laplace(scale = 1/(eps/2))
    f_handle.write("\tDP estimate q2 = %s + %s = %s\t\t vs %s\n" %(q2, nu_q2, q2 + nu_q2, m))

    if q1 + nu_q1 >= m:
        f_handle.write("Distance bound unmet\n")
        re = 0
    elif q2 + nu_q2 >=  m:
        f_handle.write("Distance bound unmet\n")
        re = 0
    else:
        f_handle.write("Distance bound satisfied\n")
        re = 1
    return re, q_D, q_D_s

In [11]:
def err_histmed(q, A_i, df, df_D_s, tau, eps, f_handle):
    start_time = time.time()
    
    FN = 0
    FP = 0
    for i in range(100):
        re, q_D, q_D_s = histmed(q, A_i, df, df_D_s, tau, eps, f_handle)
        tmp = FP_FN(re, q_D, q_D_s, tau)
        if tmp == 'FN':
            FN += 1
        if tmp == 'FP':
            FP += 1
    err = (FN + FP)/100
    f_handle.write("\n")
    
    print("---- %s seconds ----" % (time.time() - start_time))
    
    return err

---
---
# 9 queries

In [12]:
q1 = {'WORKLY': [['==', 'Yes']], 'CLASSWKR': [['==', 'Wage/salary, private']], 'EDUC': [['==', "Bachelor's degree"]]}  #60
q2 = {'SEX': [['==', 'Male']], 'RACE': [['==', 'White-Black']], 'RELATE': [['==', 'Spouse']]}
q3 = {'SEX': [['==', 'Male']], 'RACE': [['==', 'Black']], 'CLASSWKR': [['==', 'Wage/salary, private']]}
q4 = {'SEX': [['==', 'Female']], 'RACE': [['==', 'Black']], 'WORKLY': [['==', 'Yes']]}
q5 = {'SEX': [['==', 'Female']], 'EDUC': [['==', "Some college but no degree"]], 'MARST': [['==', 'Married, spouse absent']]}
q6 = {'SEX': [['==', 'Male']], 'CITIZEN': [['==', "Born in U.S"]], 'WORKLY': [['==', 'Yes']]}
q7 = {'RACE': [['==', 'Asian only']], 'MARST': [['==', "Separated"]], 'CITIZEN': [['==', 'Born in U.S']]}
q8 = {'SEX': [['==', 'Male']], 'RACE': [['==', "White"]], 'CLASSWKR': [['==', "Wage/salary, private"]]}
q9 = {'WORKLY': [['==', 'Yes']], 'CLASSWKR': [['==', "Armed forces"]], 'EDUC': [['==', "Doctorate degree"]]}

queries =  [q1, q2, q3, q4, q5, q6, q7, q8, q9]

In [13]:
tau_frac = [0.002, 0.008, 0.032, 0.128, 0.512]
mydata = []
for i in range(len(queries)):
    tmp = []
    q_D = get_median(df, queries[i], 'AGE')
    q_Ds = get_median(df_D_s, queries[i], 'AGE')
    n_p = get_query_result(df, queries[i])
    tmp.append(q_D)
    tmp.append(q_Ds)
    tmp.append(n_p)
    str_ = ''
    for j in tau_frac:
        str_ = str(j*q_Ds) + '_('+ str(round(q_Ds - j*q_Ds, 3)) + ',' + str(round(q_Ds + j*q_Ds, 3)) + ')'
        tmp.append(str_)
    mydata.append(tmp)

mydata = pd.DataFrame(mydata, index =['q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9'],
                              columns =['q(D)', 'q(D_s)','n_prime', 0.002, 0.008, 0.032, 0.128, 0.512])
mydata

Unnamed: 0,q(D),q(D_s),n_prime,0.002,0.008,0.032,0.128,0.512
q1,41,41,126890,"0.082_(40.918,41.082)","0.328_(40.672,41.328)","1.312_(39.688,42.312)","5.248_(35.752,46.248)","20.992_(20.008,61.992)"
q2,40,40,253,"0.08_(39.92,40.08)","0.32_(39.68,40.32)","1.28_(38.72,41.28)","5.12_(34.88,45.12)","20.48_(19.52,60.48)"
q3,40,40,33239,"0.08_(39.92,40.08)","0.32_(39.68,40.32)","1.28_(38.72,41.28)","5.12_(34.88,45.12)","20.48_(19.52,60.48)"
q4,41,41,51352,"0.082_(40.918,41.082)","0.328_(40.672,41.328)","1.312_(39.688,42.312)","5.248_(35.752,46.248)","20.992_(20.008,61.992)"
q5,40,40,1653,"0.08_(39.92,40.08)","0.32_(39.68,40.32)","1.28_(38.72,41.28)","5.12_(34.88,45.12)","20.48_(19.52,60.48)"
q6,41,41,358766,"0.082_(40.918,41.082)","0.328_(40.672,41.328)","1.312_(39.688,42.312)","5.248_(35.752,46.248)","20.992_(20.008,61.992)"
q7,39,39,195,"0.078_(38.922,39.078)","0.312_(38.688,39.312)","1.248_(37.752,40.248)","4.992_(34.008,43.992)","19.968_(19.032,58.968)"
q8,40,40,267261,"0.08_(39.92,40.08)","0.32_(39.68,40.32)","1.28_(38.72,41.28)","5.12_(34.88,45.12)","20.48_(19.52,60.48)"
q9,40,40,87,"0.08_(39.92,40.08)","0.32_(39.68,40.32)","1.28_(38.72,41.28)","5.12_(34.88,45.12)","20.48_(19.52,60.48)"


# Experiments

In [15]:
tau_frac = [0.002, 0.008, 0.032, 0.128, 0.512]   
default_eps = 0.25

eps = [0.0625, 0.125, 0.25, 0.5, 1]   
default_tau = 0.032

delta = 0.05

In [16]:
lst_EM_vary_tau = [[], [], [], [], [], [], [], [], []]
lst_EM_vary_eps = [[], [], [], [], [], [], [], [], []]

lst_Hist_vary_tau = [[], [], [], [], [], [], [], [], []]
lst_Hist_vary_eps = [[], [], [], [], [], [], [], [], []]

# EM---vary tau

In [17]:
f_handle = open('median_EM_vary_tau.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_median(df_D_s, i, 'AGE')
    lst_EM_vary_tau[num].append(err_emmed(i, 'AGE', age, df, df_D_s, tau_frac[0]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emmed(i, 'AGE', age, df, df_D_s, tau_frac[1]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emmed(i, 'AGE', age, df, df_D_s, tau_frac[2]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emmed(i, 'AGE', age, df, df_D_s, tau_frac[3]*q_D_s, default_eps, f_handle))
    lst_EM_vary_tau[num].append(err_emmed(i, 'AGE', age, df, df_D_s, tau_frac[4]*q_D_s, default_eps, f_handle))
    print()
    num = num + 1
f_handle.close()

print(lst_EM_vary_tau)

---- 76.12823128700256 seconds ----
---- 76.31841397285461 seconds ----
---- 76.14160513877869 seconds ----
---- 76.07816505432129 seconds ----
---- 76.59980082511902 seconds ----

---- 40.52935194969177 seconds ----
---- 40.64575409889221 seconds ----
---- 40.61345410346985 seconds ----
---- 40.641660928726196 seconds ----
---- 41.028985261917114 seconds ----

---- 46.41013193130493 seconds ----
---- 45.98005199432373 seconds ----
---- 46.01666617393494 seconds ----
---- 45.980655670166016 seconds ----
---- 45.9067440032959 seconds ----

---- 49.54567098617554 seconds ----
---- 49.443907022476196 seconds ----
---- 49.55816388130188 seconds ----
---- 49.52587008476257 seconds ----
---- 49.489394187927246 seconds ----

---- 48.314019203186035 seconds ----
---- 48.26707100868225 seconds ----
---- 48.34895205497742 seconds ----
---- 48.32034993171692 seconds ----
---- 48.34060072898865 seconds ----

---- 77.61828303337097 seconds ----
---- 78.03204798698425 seconds ----
---- 77.6605498790

# EM---vary eps

In [18]:
f_handle = open('median_EM_vary_eps.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_median(df_D_s, i, 'AGE')
    lst_EM_vary_eps[num].append(err_emmed(i,'AGE', age, df, df_D_s, default_tau*q_D_s, eps[0], f_handle))
    lst_EM_vary_eps[num].append(err_emmed(i,'AGE', age, df, df_D_s, default_tau*q_D_s, eps[1], f_handle))
    lst_EM_vary_eps[num].append(err_emmed(i,'AGE', age, df, df_D_s, default_tau*q_D_s, eps[2], f_handle))
    lst_EM_vary_eps[num].append(err_emmed(i,'AGE', age, df, df_D_s, default_tau*q_D_s, eps[3], f_handle))
    lst_EM_vary_eps[num].append(err_emmed(i,'AGE', age, df, df_D_s, default_tau*q_D_s, eps[4], f_handle))
    print()
    num = num + 1
f_handle.close()

print(lst_EM_vary_eps)

---- 77.69385814666748 seconds ----
---- 77.45881795883179 seconds ----
---- 77.18001294136047 seconds ----
---- 77.15594387054443 seconds ----
---- 77.30112886428833 seconds ----

---- 41.11814594268799 seconds ----
---- 40.679636001586914 seconds ----
---- 40.72119331359863 seconds ----
---- 40.65622401237488 seconds ----
---- 40.772562980651855 seconds ----

---- 45.989054918289185 seconds ----
---- 45.80917501449585 seconds ----
---- 45.86086392402649 seconds ----
---- 45.89493799209595 seconds ----
---- 45.88972306251526 seconds ----

---- 49.40224885940552 seconds ----
---- 49.42267394065857 seconds ----
---- 49.23490500450134 seconds ----
---- 49.476637840270996 seconds ----
---- 49.51488423347473 seconds ----

---- 48.73547005653381 seconds ----
---- 48.48579287528992 seconds ----
---- 48.71938395500183 seconds ----
---- 48.46637320518494 seconds ----
---- 48.14233589172363 seconds ----

---- 77.83130502700806 seconds ----
---- 77.59530377388 seconds ----
---- 77.2363851070404 

  pr = [pr[i]/tot for i in range(len(pr))]


---- 72.46576070785522 seconds ----
---- 72.40935707092285 seconds ----

---- 47.34933304786682 seconds ----
---- 47.404834032058716 seconds ----
---- 47.367918968200684 seconds ----
---- 47.365243911743164 seconds ----
---- 47.389281034469604 seconds ----

[[0.0, 0.0, 0.0, 0.0, 0.0], [0.72, 0.52, 0.37, 0.1, 0.03], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.31, 0.14, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.86, 0.7, 0.51, 0.29, 0.03], [0.0, 0.0, 0.0, 1.0, 1.0], [0.9, 0.78, 0.62, 0.3, 0.11]]


# Hist---vary tau

In [19]:
f_handle = open('median_Hist_vary_tau.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_median(df_D_s, i, 'AGE')
    lst_Hist_vary_tau[num].append(err_histmed(i, 'AGE', df, df_D_s, tau_frac[0]*q_D_s, default_eps, f_handle))
    lst_Hist_vary_tau[num].append(err_histmed(i, 'AGE', df, df_D_s, tau_frac[1]*q_D_s, default_eps, f_handle))
    lst_Hist_vary_tau[num].append(err_histmed(i, 'AGE', df, df_D_s, tau_frac[2]*q_D_s, default_eps, f_handle))
    lst_Hist_vary_tau[num].append(err_histmed(i, 'AGE', df, df_D_s, tau_frac[3]*q_D_s, default_eps, f_handle))
    lst_Hist_vary_tau[num].append(err_histmed(i, 'AGE', df, df_D_s, tau_frac[4]*q_D_s, default_eps, f_handle))
    print()
    num = num + 1
f_handle.close()

print(lst_Hist_vary_tau)

---- 133.28719902038574 seconds ----
---- 133.30722284317017 seconds ----
---- 133.24568009376526 seconds ----
---- 133.07084798812866 seconds ----
---- 132.37571620941162 seconds ----

---- 71.4096291065216 seconds ----
---- 71.3939483165741 seconds ----
---- 71.4008629322052 seconds ----
---- 71.38017296791077 seconds ----
---- 71.42420721054077 seconds ----

---- 80.01199793815613 seconds ----
---- 80.00273513793945 seconds ----
---- 79.99132490158081 seconds ----
---- 79.96221971511841 seconds ----
---- 79.7936429977417 seconds ----

---- 85.88692712783813 seconds ----
---- 85.95404291152954 seconds ----
---- 85.90589213371277 seconds ----
---- 85.85199999809265 seconds ----
---- 85.64142107963562 seconds ----

---- 84.64307689666748 seconds ----
---- 84.64207983016968 seconds ----
---- 84.66582179069519 seconds ----
---- 84.63913607597351 seconds ----
---- 84.65088987350464 seconds ----

---- 130.18317699432373 seconds ----
---- 130.15702414512634 seconds ----
---- 130.22058296203

# Hist---vary eps

In [20]:
f_handle = open('median_Hist_vary_eps.txt', 'w')
num = 0
for i in queries:
    f_handle.write("%s\n" %(i))
    q_D_s = get_median(df_D_s, i, 'AGE')
    lst_Hist_vary_eps[num].append(err_histmed(i,'AGE', df, df_D_s, default_tau*q_D_s, eps[0], f_handle))
    lst_Hist_vary_eps[num].append(err_histmed(i,'AGE', df, df_D_s, default_tau*q_D_s, eps[1], f_handle))
    lst_Hist_vary_eps[num].append(err_histmed(i,'AGE', df, df_D_s, default_tau*q_D_s, eps[2], f_handle))
    lst_Hist_vary_eps[num].append(err_histmed(i,'AGE', df, df_D_s, default_tau*q_D_s, eps[3], f_handle))
    lst_Hist_vary_eps[num].append(err_histmed(i,'AGE', df, df_D_s, default_tau*q_D_s, eps[4], f_handle))
    print()
    num = num + 1
f_handle.close()

print(lst_Hist_vary_eps)s

---- 155.3649880886078 seconds ----
---- 186.554869890213 seconds ----
---- 143.24581813812256 seconds ----
---- 175.7393639087677 seconds ----
---- 187.69583106040955 seconds ----

---- 104.1426990032196 seconds ----
---- 106.35732507705688 seconds ----
---- 100.69592595100403 seconds ----
---- 100.52722001075745 seconds ----
---- 100.91976928710938 seconds ----

---- 114.24548768997192 seconds ----
---- 114.90965509414673 seconds ----
---- 117.06384992599487 seconds ----
---- 123.48565196990967 seconds ----
---- 111.65394806861877 seconds ----

---- 106.51037216186523 seconds ----
---- 106.54294180870056 seconds ----
---- 106.44451785087585 seconds ----
---- 104.06315279006958 seconds ----
---- 103.18718695640564 seconds ----

---- 102.9418432712555 seconds ----
---- 102.649240732193 seconds ----
---- 103.25637912750244 seconds ----
---- 102.13845872879028 seconds ----
---- 100.85428929328918 seconds ----

---- 157.36268210411072 seconds ----
---- 156.56472492218018 seconds ----
----