In [1]:
import collections
from collections import Counter
import math
import string
import logging
import numpy as np
import pandas as pd
from itertools import permutations
from sympy.utilities.iterables import partitions
import matplotlib.pyplot as plt 
pd.set_option('precision', 10)
%matplotlib inline

### Test Variables

In [2]:
prior = [1/2, 1/3, 1/6]
n = 10
k = 3

## Functions

---
**k_dictionary**

Input: pi, the probability distribution

Output: a dictionary with letters for keys. The letters represent individual diseases. Can be used to keep track of individual diseases throughout the gain function calculations.

```python
pi = [1/2, 1/3, 1/6]
k_dict = {'a': 0.5, 'b': 0.33333, 'c':0.166666}
```

In [3]:
def k_dictionary(pi):
    pi.sort(reverse=True)
    return dict(zip(string.ascii_lowercase, pi))

In [4]:
k_dict = k_dictionary(prior)
print(k_dict)

{'a': 0.5, 'b': 0.3333333333333333, 'c': 0.16666666666666666}


---
**gen_col** generates a non-unform random sample of size n according to the prior

In [5]:
def gen_col(n, prior):
    letters = ['a', 'b', 'c']
    return np.random.choice(letters, n, p=prior).tolist()

In [6]:
a = gen_col(n, prior)
print(a)

['b', 'b', 'b', 'b', 'c', 'c', 'a', 'c', 'a', 'b']


---
**gen_index_list** provides a list of lists containing indices to the adversary

In [7]:
def gen_index_list(letter_list):
    ind_x = []
    ind_y = []
    ind_z = []
    index_list = []
    i = 0
    for x in letter_list:
        if x=='a':
            ind_x.append(i)
        elif x == 'b':
            ind_y.append(i)
        elif x == 'c':
            ind_z.append(i)
        else:
            raise Exception("x != a, b, c")
        i = i + 1
    index_list.append(ind_x)
    index_list.append(ind_y)
    index_list.append(ind_z)
    return index_list

In [8]:
il = gen_index_list(a)
print(il)

[[6, 8], [0, 1, 2, 3, 9], [4, 5, 7]]


---
**index_list_to_guess** acts as the adversary. It takes as input a list of indices, sorts them by size, and associates 'a', 'b', and 'c' with the lists. Outputs a guess column of letters.

In [9]:
def index_list_to_guess(index_list, n):
    guess_list = []
    index_list.sort(key=len, reverse=True)
    d2b = {'a': index_list[0], 'b': index_list[1], 'c': index_list[2]}
    for i in range(n):
        if i in d2b['a']:
            guess_list.append('a')
        elif i in d2b['b']:
            guess_list.append('b')
        elif i in d2b['c']:
            guess_list.append('c')
        else:
            raise Exception("issue generating guess list")
    return guess_list

In [10]:
i2g = index_list_to_guess(il, n)
print(i2g)

['a', 'a', 'a', 'a', 'b', 'b', 'c', 'b', 'c', 'a']


---
**bayes_test** 
- generates the column
- generates the index partition
- provides the index partition to the adversary
- adversary makes guess
- checks if guess was correct

In [11]:
def bayes_test(n, k, prior):
    col = gen_col(n, prior)## Client has a plaintext column
    logging.info("*** Plaintext Column *** -\n {}".format(col))
    index_list = gen_index_list(col) ## Generate index partition
    guess_list = index_list_to_guess(index_list, n) ## Index partition to adversary, guess
    trust_value = (col == guess_list)
    logging.info("*** True or False? *** -\n {}".format(trust_value))
    return trust_value

In [None]:
bayes_test(n, k, prior)

In [None]:
logging.getLogger().setLevel(logging.INFO)

In [None]:
logging.getLogger().setLevel(logging.WARNING)

---
**run_tests** runs `bayes_test` for a set number of times and outputs the success rate

In [12]:
def run_tests(num_tests, n, k, prior):
    yay = 0
    for i in range(num_tests):
        val = bayes_test(n, k, prior)
        if val == True:
            yay = yay + 1
    return yay/num_tests

# Bayes Tests

At n = 3, posterior Bayes ~.4027

Trials: 10M, success rate: ~.542 

In [None]:
num_tests = 10000000
n = 3
k = 3
prior = (1/2, 1/3, 1/6)
success = run_tests(num_tests, n, k, prior)
print("Success rate: ", success)

Posterior bayes vulnerability for n = 10: approx .51

Adveraging an unusually high success rate. 
- At 1M tests, ~.62. 
- At 10M, ~.62

In [None]:
num_tests = 1000000
n = 10
k = 3
prior = (1/2, 1/3, 1/6)
success = run_tests(num_tests, n, k, prior)
print("Success rate: ", success)

In [None]:
num_tests = 10000000
n = 10
k = 3
prior = (1/2, 1/3, 1/6)
success = run_tests(num_tests, n, k, prior)
print("Success rate: ", success)

Posterior bayes vuln for n = 20 is approx 0.66

At 500 tests, we're successfull ~71%

In [None]:
num_tests = 500
n = 20
k = 3
prior = [1/2, 1/3, 1/6]
success_rate = run_tests(num_tests, n, k, prior)
print("Your success rate: ", success_rate)

## C/P Bayes Functions

In [None]:
from sympy.utilities.iterables import partitions
from sympy.functions.combinatorial.numbers import stirling

In [None]:
def calc_set_partitions(n,p):
    num = math.factorial(n)
    den = 1
    mul = 0
    for i in p.keys():
        mul = p[i]
        den = den * pow(math.factorial(i), mul) * math.factorial(mul)
    frac = num / den
    return frac

In [None]:
def calc_max_prob(p, Pi):
    Pi.sort(reverse=True)
    frac = 1
    ind = 0
    val = -1
    for key in sorted(p.keys(), reverse=True):
        val = p[key]
        for i in range(val):
            frac = frac * pow(Pi[ind], key)
            ind = ind + 1
    return frac

In [None]:
def nonuni_post_bayes_vuln(n, k, Pi):
    pbvsum = 0
    for p in partitions(n, m=k):  
        num = calc_set_partitions(n,p)
        frac = calc_max_prob(p, Pi)
        val = num * frac
        pbvsum = pbvsum + val
    return pbvsum

## Comparing Bayes Posterior Vuln and Experimental Success

In [None]:
def bayes_post_vs_success(n_range, k, probs, num_tests):
    post_bayes_vs = []
    success = []
    for i in range(n_range):
        post_bayes_vs.append(nonuni_post_bayes_vuln(i+1, k, probs))
        success.append(run_tests(num_tests, i+1, k, probs))
    df = pd.DataFrame(
        {'posterior':post_bayes_vs,
         'success':success
        }
    )
    return df

In [None]:
def graph_bayes_exp(n_range, k, probs, df):
    x = [i+1 for i in range(n_range)]
    plt.plot(x, 'posterior', data=df, label="posterior")
    plt.plot(x, 'success', data=df, label="success")
    plt.xlabel("n values")
    plt.ylim(bottom = -.1, top = 1)
    plt.legend()
    plt.show()
    plt.close()

### n = 1 - 100, 100 tests per n

In [None]:
n_range = 100
k = 3
probs = [1/2, 1/3, 1/6]
num_tests = 100

In [None]:
df_vs = bayes_post_vs_success(n_range, k, probs, num_tests)
df_vs

In [None]:
df_vs.to_csv (r'data\df_vs_100_nonuni.csv', index = None, header=True) 

In [None]:
df_vs100_csv = pd.read_csv('data\df_vs_100_nonuni.csv')

In [None]:
graph_bayes_exp(n_range, k, probs, df_vs)

## n = 1 - 200, 200 tests per n

Approx. 13 minutes to run

In [None]:
n_range = 200
k = 3
probs = [1/2, 1/3, 1/6]
num_tests = 200

In [None]:
df_200 = bayes_post_vs_success(n_range, k, probs, num_tests)
df_200

In [None]:
df_200.to_csv (r'data\df_200_nonuni.csv', index = None, header=True) 

In [None]:
graph_bayes_exp(n_range, k, probs, df_200)