In [61]:
import collections
import math
import string
import numpy as np
import pandas as pd
from itertools import permutations, product
from sympy.utilities.iterables import partitions
from sympy.functions.combinatorial.numbers import stirling
import matplotlib.pyplot as plt 
%matplotlib notebook
pd.set_option('precision', 7)

**k_dictionary**

Input: pi, the probability distribution

Output: a dictionary with letters for keys. The letters represent individual diseases. Can be used to keep track of individual diseases throughout the gain function calculations.

Note-- Not currently in use

```python
pi = [1/2, 1/3, 1/6]
k_dict = {'a': 0.5, 'b': 0.33333, 'c':0.166666}
```

In [2]:
def k_dictionary(pi):
    return dict(zip(string.ascii_lowercase, pi))

In [3]:
pi = [1/2, 1/3, 1/6]

pi_dictionary = k_dictionary(pi)

print(pi_dictionary)

{'a': 0.5, 'b': 0.3333333333333333, 'c': 0.16666666666666666}


**type_1**

Input: 
- an integer partition p 
- pi, a list of probabilities per disease in k 

```python
p = {5:1, 1:1}
pi = [.5, .333, .166]
```

Output: a dataframe where the first three columns represent the type and the fourth is the type's probability

Notes: 
- hardcoded for k = 3
- uses col_names_p() as a helper function. 

In [4]:
def type_1(p, pi):
    '''
    We use the int partition p to calculate 
    1. the number of bins and 
    2. the column names for the dataframe
    '''
    bins = sum(p.values()) #there will always be <=k 'bins' 
    col_names = list_all_ints(p)
    df = pd.DataFrame(list(permutations(pi, bins)), columns=col_names)
    df['prob'] = pow(df[col_names[0]],col_names[0]) *  pow(df[col_names[1]],col_names[1]) *  pow(df[col_names[2]],col_names[2])
    return df

**list_all_ints** is a helper function to type_probability

Input: the integer partition dictionary

Output: a list of the integers

```python
p = {3:1, 1:2}
col_names = [3, 1, 1]
```

In [5]:
def list_all_ints(p):
    col_names = []
    for i in p.keys(): # i = integer, m = multiplicity
        m = p[i]
        for x in range(m):
            col_names.append(i)
    return col_names

In [6]:
df = type_1({3:1, 2:1, 1:1}, pi)
print(df)

          3         2         1      prob
0  0.500000  0.333333  0.166667  0.002315
1  0.500000  0.166667  0.333333  0.001157
2  0.333333  0.500000  0.166667  0.001543
3  0.333333  0.166667  0.500000  0.000514
4  0.166667  0.500000  0.333333  0.000386
5  0.166667  0.333333  0.500000  0.000257


In [7]:
def type_2(p, pi, k):
    int_list = list_all_ints(p)
    int_list.extend([0] * (k-len(int_list))) # pads list with 0s
    df = pd.DataFrame(list(permutations(int_list, k)), columns=['a', 'b','c'])
    df['prob'] = pow(pi[0], df['a']) * pow(pi[1], df['b']) * pow(pi[2], df['c'])
    return df, int_list

In [82]:
# Test of type_2 with different int partitions
p1 = {3:1, 2:1, 1:1}
p2 = {3:2}
p3 = {6:1}
k=3
df2, int_list = type_2(p1, pi, k)
print(df2)

   a  b  c       prob
0  3  2  1  0.0023148
1  3  1  2  0.0011574
2  2  3  1  0.0015432
3  2  1  3  0.0003858
4  1  3  2  0.0005144
5  1  2  3  0.0002572


In [76]:
def choices_matrix(df, int_list):
    df_g = pd.DataFrame(0, index=int_list, columns=['a','b','c'])
    for i in int_list:
        df_g.loc[i,'a'] = df[df['a'] == i]['prob'].sum()
        df_g.loc[i,'b'] = df[df['b'] == i]['prob'].sum()
        df_g.loc[i,'c'] = df[df['c'] == i]['prob'].sum()
    return df_g

In [83]:
# Test of choices_matrix()
df_guess = choices_matrix(df2, int_list)
print(df_guess)

           a          b          c
3  0.0034722  0.0020576  0.0006430
2  0.0019290  0.0025720  0.0016718
1  0.0007716  0.0015432  0.0038580


In [72]:
df.max() # to max down dataframe column

3       0.500000
2       0.500000
1       0.500000
prob    0.002315
dtype: float64

In [73]:
df.idxmax()

3       0
2       2
1       3
prob    0
dtype: int64

In [16]:
df.max().max() # to max over entire dataframe

0.5

**calc_set_partitions()** calculates number of index partitions given an integer n and a integer partition p

In [2]:
def calc_set_partitions(n,p):
    num = math.factorial(n)
    den = 1
    mul = 0
    for i in p.keys():
        m = p[i]
        den = den * pow(math.factorial(i), m) * math.factorial(m)
    frac = num / den
    return frac

Tests

In [18]:
for key in pi_dictionary:
    print("key: ", key)

key:  a
key:  b
key:  c


In [5]:
n = 6
k = 3
for p in partitions(n, m = k):
    print("p = ", p)
    print("p.keys= ", p.keys())
    num = calc_set_partitions(n,p)
    print("num: ", num)

p =  {6: 1}
p.keys=  dict_keys([6])
num:  1.0
p =  {5: 1, 1: 1}
p.keys=  dict_keys([5, 1])
num:  6.0
p =  {4: 1, 2: 1}
p.keys=  dict_keys([4, 2])
num:  15.0
p =  {4: 1, 1: 2}
p.keys=  dict_keys([4, 1])
num:  15.0
p =  {3: 2}
p.keys=  dict_keys([3])
num:  10.0
p =  {3: 1, 2: 1, 1: 1}
p.keys=  dict_keys([3, 2, 1])
num:  60.0
p =  {2: 3}
p.keys=  dict_keys([2])
num:  15.0
