In [1]:
import numpy as np
import pandas as pd

In [2]:
def generate_data(data_type, R, n, k=0):
    if data_type == 'gaussian':
        return np.random.normal(R/4, R/np.sqrt(10), (n,))
    if data_type == 'poisson':
        return np.random.poisson(50, (n,))
    if data_type == 'bimodal':
        options = [(R/2)-k, (R/2)+k]
        bimodal_data = [np.random.choice(options) for i in range(n)]
        return np.array(bimodal_data)

In [113]:
def rank(x, i):
    x = sorted(x)
    x = np.array(x)
    return np.rint(np.sum(x < i) + (np.sum(x == i) / 2)) + 1

In [85]:
d = generate_data('gaussian', 100, 50)

In [39]:
def q(x, y):
    return -1 * np.absolute(np.sum(np.sign(x-y)))

In [44]:
def report_noisy_max(x, R, epsilon):
    x = np.minimum(np.array([R]), np.rint(x))
    x = np.maximum(np.array([1]), x)
    max = ('', float("-inf"))
    for y in range(1,R+1):
        score = q(x, np.array([y])) + np.random.exponential(2/epsilon)
        if score > max[1]:
            max = (y, score)
    return max[0]

In [8]:
sizes = [50,100,500,2000,10000]
gaussian_rs = [100,1000,10000]
poisson_rs = [100,1000,10000]
bimodal_r = 1000
bimodal_ks = [10,100,200]

In [114]:
def rnm_algorithm(data_type, R, n, k=0):
    errors = []
    std_errors_in_rank = []
    errors_in_rank = []
    for i in range(50):
        data = generate_data(data_type, R, n, k)
        rank_errors = []
        for i in range(10):
            rmn = report_noisy_max(data, R, 0.1)
            error = np.absolute((rank(data, rmn) - (n/2)))
            errors.append(error)
            rank_errors.append(error)
        avg_error_in_rank = np.mean(np.array(rank_errors))
        rank_errors = []
        errors_in_rank.append(avg_error_in_rank)
    avg_error = np.mean(errors)
    std_error = np.std(errors)
    std_errors_in_rank = np.std(errors_in_rank)
    return avg_error, std_error, std_errors_in_rank

In [11]:
results[]

In [12]:
results.append(('DISTRIBUTION', 'R', 'N', 'EPSILON', 'k', 'AVERAGE ERROR', 'STD OF ERROR', 'STD OF ERROR ON SAMPLE'))

In [13]:
results

[('DISTRIBUTION',
  'R',
  'N',
  'EPSILON',
  'k',
  'AVERAGE ERROR',
  'STD OF ERROR',
  'STD OF ERROR ON SAMPLE')]

In [88]:
# GAUSSIAN
for n in sizes:
    for r in gaussian_rs:
        rnm_results = rnm_algorithm('gaussian', r, n)
        results.append(('gaussian', r, n, 0.1, 'N/A', rnm_results[0], rnm_results[1], rnm_results[2]))

print(results)

[('DISTRIBUTION', 'R', 'N', 'EPSILON', 'k', 'AVERAGE ERROR', 'STD OF ERROR', 'STD OF ERROR ON SAMPLE'), ('gaussian', 100, 50, 0.1, 'N/A', 8.622, 7.383435243841446, 2.4859034574978973), ('gaussian', 1000, 50, 0.1, 'N/A', 9.026, 7.755599525504137, 2.5110802456313497), ('gaussian', 10000, 50, 0.1, 'N/A', 9.122, 7.411013156107605, 2.8532991430973373), ('gaussian', 100, 100, 0.1, 'N/A', 9.654, 9.200776271597958, 3.0710395634051997), ('gaussian', 1000, 100, 0.1, 'N/A', 9.806, 10.226062976532074, 3.558674472328145), ('gaussian', 10000, 100, 0.1, 'N/A', 10.13, 9.791889501010518, 3.508803214772809), ('gaussian', 100, 500, 0.1, 'N/A', 9.096, 9.713844964791233, 3.151378111239589), ('gaussian', 1000, 500, 0.1, 'N/A', 10.98, 10.28686541177632, 3.350701419106155), ('gaussian', 10000, 500, 0.1, 'N/A', 10.176, 9.919325783539929, 3.3903722509482646), ('gaussian', 100, 2000, 0.1, 'N/A', 9.796, 8.17669762190091, 4.295297894209434), ('gaussian', 1000, 2000, 0.1, 'N/A', 9.324, 8.686254889191314, 2.31832353

In [104]:
df1 = pd.DataFrame(results[1:], columns=results[0])
df1

Unnamed: 0,DISTRIBUTION,R,N,EPSILON,k,AVERAGE ERROR,STD OF ERROR,STD OF ERROR ON SAMPLE
0,gaussian,100,50,0.1,,8.622,7.383435,2.485903
1,gaussian,1000,50,0.1,,9.026,7.7556,2.51108
2,gaussian,10000,50,0.1,,9.122,7.411013,2.853299
3,gaussian,100,100,0.1,,9.654,9.200776,3.07104
4,gaussian,1000,100,0.1,,9.806,10.226063,3.558674
5,gaussian,10000,100,0.1,,10.13,9.79189,3.508803
6,gaussian,100,500,0.1,,9.096,9.713845,3.151378
7,gaussian,1000,500,0.1,,10.98,10.286865,3.350701
8,gaussian,10000,500,0.1,,10.176,9.919326,3.390372
9,gaussian,100,2000,0.1,,9.796,8.176698,4.295298


In [105]:
# POISSON
for n in sizes:
    for r in poisson_rs:
        rnm_results = rnm_algorithm('poisson', r, n)
        results.append(('poisson', r, n, 0.1, 'N/A', rnm_results[0], rnm_results[1], rnm_results[2]))

print(results)

[('DISTRIBUTION', 'R', 'N', 'EPSILON', 'k', 'AVERAGE ERROR', 'STD OF ERROR', 'STD OF ERROR ON SAMPLE'), ('gaussian', 100, 50, 0.1, 'N/A', 8.622, 7.383435243841446, 2.4859034574978973), ('gaussian', 1000, 50, 0.1, 'N/A', 9.026, 7.755599525504137, 2.5110802456313497), ('gaussian', 10000, 50, 0.1, 'N/A', 9.122, 7.411013156107605, 2.8532991430973373), ('gaussian', 100, 100, 0.1, 'N/A', 9.654, 9.200776271597958, 3.0710395634051997), ('gaussian', 1000, 100, 0.1, 'N/A', 9.806, 10.226062976532074, 3.558674472328145), ('gaussian', 10000, 100, 0.1, 'N/A', 10.13, 9.791889501010518, 3.508803214772809), ('gaussian', 100, 500, 0.1, 'N/A', 9.096, 9.713844964791233, 3.151378111239589), ('gaussian', 1000, 500, 0.1, 'N/A', 10.98, 10.28686541177632, 3.350701419106155), ('gaussian', 10000, 500, 0.1, 'N/A', 10.176, 9.919325783539929, 3.3903722509482646), ('gaussian', 100, 2000, 0.1, 'N/A', 9.796, 8.17669762190091, 4.295297894209434), ('gaussian', 1000, 2000, 0.1, 'N/A', 9.324, 8.686254889191314, 2.31832353

In [106]:
df2 = pd.DataFrame(results[1:], columns=results[0])
df2

Unnamed: 0,DISTRIBUTION,R,N,EPSILON,k,AVERAGE ERROR,STD OF ERROR,STD OF ERROR ON SAMPLE
0,gaussian,100,50,0.1,,8.622,7.383435,2.485903
1,gaussian,1000,50,0.1,,9.026,7.7556,2.51108
2,gaussian,10000,50,0.1,,9.122,7.411013,2.853299
3,gaussian,100,100,0.1,,9.654,9.200776,3.07104
4,gaussian,1000,100,0.1,,9.806,10.226063,3.558674
5,gaussian,10000,100,0.1,,10.13,9.79189,3.508803
6,gaussian,100,500,0.1,,9.096,9.713845,3.151378
7,gaussian,1000,500,0.1,,10.98,10.286865,3.350701
8,gaussian,10000,500,0.1,,10.176,9.919326,3.390372
9,gaussian,100,2000,0.1,,9.796,8.176698,4.295298


In [125]:
# BIMODAL
for n in sizes:
    r = 1000
    for k in bimodal_ks:
        rnm_results = rnm_algorithm('bimodal', r, n, k)
        results.append(('bimodal', r, n, 0.1, k, rnm_results[0], rnm_results[1], rnm_results[2]))

In [139]:
df3 = pd.DataFrame(results[1:], columns=results[0])
df3

df3.to_csv('~/Documents/cs7880/dp-report-noisy-max/report-noisy-max-results.csv')