# In real life; things depend on other things

Probability Smart / Dumb is 0.5 / 0.5

 - P_S = Professor at Stanford
 - P(P_S) = 0.001
 - P(P_S | S) = 0.002
 - P(P_S | D) = 0.000

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools
%matplotlib inline

def p_(h=None, t=None): 
    #if h is not None:
    #    return 1-h
    #elif t is not None:
    #    return 1-t
    #else:
    #    return 1
    
    return 1-(h or t or 0)

#Returns 1 by default; the totality
assert p_()==1
#Other standard probabilities
assert p_(h=1)==0
assert p_(h=0.5)==0.5
assert p_(t=0.2)==0.8
#Uses first nonzero arg
assert p_(h=0,t=1)==0
assert p_(h=0.2, t=0.5)==0.8



In [2]:
def cancer_example_1(p_c=0.1):
    """
    :returns: P(¬C) if P(C) is 0.1
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48729371/m-48673740
    """
    return p_(p_c)

In [4]:
assert cancer_example_1()==0.9, 'If 10% have cancer; 90% do not have cancer'

# Conditional Probability
Probability A given that Z has happened
## P(A|Z)
## p_a_g_z
If there are two events; A,B and X,Z , then 
 - P(A|X) + P(A|Z) == P(A)
 - P(A|X) + P(B|X) == 1
 
  > This is called conditional probability. What's the probability of the thing on the left, *given* the event on the right

The test is *not* deterministic; it makes mistakes!

In [5]:
p_c=0.1
p_n_c = cancer_example_1(p_c)
p_p_g_c = 0.9
p_n_g_c = 0.1

In [7]:
p_p_g_n_c = 0.2
p_n_g_n_c = p_(p_p_g_n_c)

def cancer_example_2(p_p_g_n_c=p_p_g_n_c):
    """
    :returns: P(N|¬C) ; the specificity rate; true negatives
    http://en.wikipedia.org/wiki/Sensitivity_and_specificity
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48723639/m-48691628
    """
    p_n_g_n_c = p_(p_p_g_n_c)
    return p_n_g_n_c

In [8]:
assert cancer_example_2()==0.8, 'P(Negative Given No Cancer) should be 0.8; (because P(Positive Given No Cancer) is 0.2)'

In [9]:
import functools
def basic_prob_law_reduction(p_list):
    """
    :returns: joint probability
    of all events in a list happening
    by applying 2nd law
    http://stackoverflow.com/a/13840436/1175496
    """
    assert all([_<=1 for _ in p_list]), 'Should be less than or equal to 1'
    #assert sum(p_list)==1; they are different events, needn't sum to 1
    return functools.reduce(operator.mul, p_list)


In [38]:
import operator
def c_map_assertion(c_map):
    """
    :params c_map: where Key is of form 'D|I' and 
    value is conditional probability
    """
    #Be very careful not to mix up P(N|N) and P(N|C) , they don't sum to 1; they sum to P(N)
    c_map_items = c_map.items()
    print(c_map_items)
    #Same independent event; the second part of the 'P|C'
    sie = lambda s: s[0].split('|')[1]
    
    
    groups = itertools.groupby(sorted(c_map_items, key=sie), sie)
    print(list(itertools.groupby(sorted(c_map_items, key=sie), sie)))
    assert all([sum([_[1] for _ in k])==1 for p,k in groups]), 'All the conditionals should sum to 1'
    

In [58]:

    
def cancer_example_truth_table(p_c=0.1, p_p_g_c=0.9, p_n_g_n_c=0.8):
    """
    :params p_c: P(C)
    :params p_p_g_c: P(P|C), sensitivity
    :params p_n_g_n_c: P(N|¬C), specificity
    :returns: truth table of sequential events
    that depend / influence each other (conditional probability)
    """
    #Scenarios / Binaries
    b_c = ('C','N')
    b_t = ('P','N')
    
    #Conditional map
    c_map = {'P|C':p_p_g_c,
         'N|N':p_n_g_n_c,
         'P|N':p_(p_n_g_n_c),
         'N|C':p_(p_p_g_c) }
    
    c_map_assertion(c_map)
    
    #Test on the left side; given Cancer , on the right sight T|C
    cols = 'c|t'.split('|') #('t','c')
    
    #Start the truth table; "C" column is whether there is cancer "T" if test
    t_t =  pd.DataFrame(list(itertools.product(b_t, b_c)), columns=cols)
    
    
    #The probability of cancer 
    t_t['p_c'] = t_t['c'].map({'C':p_c , 'N':p_(p_c)})
    
    #Map them using the conditional probabilities; need 4 values in map (not just 2)
    t_t['c_e'] = t_t[['c','t']].apply('|'.join, axis=1)
    
    t_t['c_p'] = t_t['c_e'].map(c_map)
#     print(t_t['c_e'])
#     print(t_t['c_p'])
#     print(t_t['c_p'])
    print(t_t)
    #The joint probability
    t_t['j_p'] = t_t[['c_p', 'p_c']].apply(basic_prob_law_reduction,axis=1)
    
    t_t = t_t.sort_values(by='p_c')
    return t_t
    
cancer_example_truth_table()

dict_items([('P|C', 0.9), ('N|N', 0.8), ('P|N', 0.19999999999999996), ('N|C', 0.09999999999999998)])
[('C', <itertools._grouper object at 0x000001A06069BFD0>), ('N', <itertools._grouper object at 0x000001A06069B1D0>)]
   c  t  p_c  c_e  c_p
0  P  C  NaN  P|C  0.9
1  P  N  NaN  P|N  0.2
2  N  C  0.9  N|C  0.1
3  N  N  0.9  N|N  0.8


AssertionError: ('Should be less than or equal to 1', 'occurred at index 0')

In [55]:
'c|t'.split('|')[::-1]

['t', 'c']

In [47]:
list(reversed((1,2)))

[2, 1]

In [22]:
cancer_example_truth_table()

Unnamed: 0,t,c,c_e,p_c,c_p,j_p
0,P,C,P|C,0.1,0.9,0.09
2,N,C,N|C,0.1,0.1,0.01
1,P,N,P|N,0.9,0.2,0.18
3,N,N,N|N,0.9,0.8,0.72


In [107]:
def cancer_example_3(p_c=0.1, p_p_g_c=0.9, p_n_g_n_c=0.8):
    """
    :returns: P(P,C)
    Solution: 
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48678914/m-48739237
    """
    #t_t = cancer_example_truth_table()
    #t_t['c_p'] = ((t_t['c']=='C') * p_c) * ((t_t['t']=='P') * p_p_g_c)
    #return t_t.ix[0]['c_p']
    return p_c * p_p_g_c

In [108]:
assert np.isclose(cancer_example_3(), 0.09), 'P(C) is 0.1; P(P|C) is 0.9. P(P,C) is 0.09'

In [109]:
def cancer_example_4(p_c=0.1, p_p_g_c=0.9, p_n_g_n_c=0.8):
    """
    :returns: P(N,C)
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48697751/m-48632850
    """
    p_n_g_c = p_(p_p_g_c)
    return p_c * p_n_g_c

In [110]:
assert np.isclose(cancer_example_4(), 0.01), 'P(C) is 0.1; P(P|C) is 0.9, so P(N|C) is 0.1; P(N,C) is 0.01'

In [111]:
def cancer_example_5(p_c=0.1, p_p_g_c=0.9, p_n_g_n_c=0.8):
    """
    :returns: P(P,N)
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48692826/m-48641640
    """
    p_n_c = p_(p_c)
    p_p_g_n_c = p_(p_n_g_n_c)
    return p_n_c * p_p_g_n_c


In [112]:
assert np.isclose(cancer_example_5(), 0.18), 'P(C) is 0.1, so P(N) is 0.9. P(N|N) is 0.8, so P(P|N) is 0.2. P(P,N) =0.9 * 0.2' 

In [113]:
def cancer_example_6(p_c=0.1, p_p_g_c=0.9, p_n_g_n_c=0.8):
    """ 
    :returns: P(N,N)
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48698740/m-48730183
    """
    p_n_c = p_(p_c)
    return p_n_c * p_n_g_n_c

In [114]:
assert np.isclose(cancer_example_6(), 0.72), 'P(C) is 0.1 so P(N) is 0.9; P(N|N) is 0.8; so P(N,N) is 0.72'

In [115]:
def cancer_example_7(p_c=0.1, p_p_g_c=0.9, p_n_g_n_c=0.8):
    """ 
    :returns: Sum of all joint probabilities, should be 1
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48721494/m-48712380
    """
    return sum((cancer_example_3(p_c, p_p_g_c, p_n_g_n_c),
                cancer_example_4(p_c, p_p_g_c, p_n_g_n_c),
                cancer_example_5(p_c, p_p_g_c, p_n_g_n_c),
                cancer_example_6(p_c, p_p_g_c, p_n_g_n_c)))

In [116]:
assert cancer_example_7() ==1 , 'Sum of all probabilities should be 1'

In [128]:
def cancer_example_8():
    """
    :returns: P(P)
    Sum of P(P,N) + P(P,C)
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48698741/m-48739233
    """
    return cancer_example_5() + cancer_example_3()

In [129]:
assert np.isclose(cancer_example_8(), 0.27), 'Sum of P(P,N) and P(P,C) is P(P); '

In [132]:
def cancer_example_8():
    """
    :returns:P(P) using the truth table approach
    """
    test_positive = cancer_example_truth_table()['t']=='P'
    return cancer_example_truth_table()[test_positive]['j_p'].sum()


In [133]:
assert np.isclose(cancer_example_8(), 0.27), 'Sum of P(P,N) and P(P,C) is P(P); '

In [161]:
def two_coins_1(p_h1=0.5, p_h2 = 0.9):
    """
    :returns: P(T) for two coins
    First of which P(H) = 0.5
    Second of which P(H) = 0.9
    Solution: 
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48723640/m-48652466
    """
    return p_(p_h1), p_(p_h2)
    

In [162]:
assert np.allclose(two_coins_1(), (0.5,0.1)), 'P(T) would be 0.5 then 0.1'

In [164]:
def two_coins_2(p_h1=0.5, p_h2 = 0.9, p_p1 = 0.5):
    """
    :returns: P(H) for randomly picking (equal chance) of two coins,
    One P(H) = 0.5
    The other P(H) = 0.9
    """
    p_p2 = p_(p_p1)
    p_h_g_1 = p_p1 * p_h1
    p_h_g_2 = p_p2 * p_h2
    return p_h_g_1 + p_h_g_2

In [166]:
assert np.isclose(two_coins_2(), 0.7), 'Total P(H) is 0.5 * 0.5 + 0.5 * 0.9' 

In [189]:
def two_coins_truth_table(p_h1=0.5, p_h2 = 0.9, p_p1 = 0.5):
    """
    :returns:
    Two coins truth table
    """
    b_c = (1,2)
    b_h = ('h','t')
    
    c_map = {'h|1':p_h1,
             't|1':p_(p_h1),
             'h|2':p_h2,
             't|2':p_(p_h2)}
    
    c_map_assertion(c_map)
    
    t_t = pd.DataFrame(list(itertools.product(b_h, b_c)), columns=('f', 'c'))

    t_t['c_p'] = (t_t['f'].str.cat(t_t['c'].apply(str), '|')).map(c_map)
    t_t['p_c'] = (t_t['c'].map({1:p_p1, 2:p_(p_p1)}))
    t_t['j_p'] = t_t[['c_p', 'p_c']].apply(basic_prob_law_reduction, axis=1)
    return t_t 
    

In [194]:
two_coins_truth_table()


Unnamed: 0,f,c,c_p,p_c,j_p
0,h,1,0.5,0.5,0.25
1,h,2,0.9,0.5,0.45
2,t,1,0.5,0.5,0.25
3,t,2,0.1,0.5,0.05


In [229]:

two_coins_truth_table()[two_coins_truth_table()['f']=='h'].sum()['j_p']

0.7

In [344]:
def two_coins_3(p_h1=0.5, p_h2 = 0.9, p_p1 = 0.5):
    """
    :returns: P(H,T) regardless of which coin
    Only works because the original truth table doesn't enumerate all possibilities
    Just H,T and H,T, which are here multiplied together with prod()
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48695799/m-48730182
    """
    #Above is wrong, it applied the 0.5 of which coin-pick to both results separately;
    #The coin isn't picked twice
    #It only needs to be applied to the joint result; 
    #The coin is picked once
    
    #ht_c1 = basic_prob_law_reduction(t_t[t_t['c']==1]['j_p'])
    #ht_c2 = basic_prob_law_reduction(t_t[t_t['c']==2]['j_p'])
  
    #print(ht_c1, ht_c2)
    #return ht_c1 + ht_c2 #basic_prob_law_reduction(t_t[t_t['c']==1]['j_p'])
    
    t_t =  two_coins_truth_table(p_h1, p_h2, p_p1)
    ht_g_c = t_t.groupby('c').prod()['c_p']
    p_ht = (0.5*ht_g_c).sum() 
    return p_ht

In [326]:
assert np.isclose(two_coins_3(), 0.17), '0.17 is the sum of 0.5(0.25 + 0.09)'

In [348]:
#Better to make another truth table
def two_coins_3(p_h1=0.5, p_h2 = 0.9, p_p1 = 0.5, f_1='h', f_2='t'):
    """
    :returns: P(H,T) regardless of which coin
    """
    coins = (1, 2)
    flip = ('h','t')
    c_map_dict_style = {1:{'h':p_h1,
                           't':p_(p_h1)},
                        2:{'h':p_h2,
                           't':p_(p_h2)}}
    red = lambda x,y: x.get(y)
    app = lambda x: functools.reduce(red, list(x), c_map_dict_style)
    t_t = pd.DataFrame(list(itertools.product(coins, flip, flip)), columns=('c','f1','f2'))
    
    
    t_t['p_f1'] =  t_t[['c','f1']].apply(app, axis=1)
    t_t['p_f2'] =  t_t[['c','f2']].apply(app, axis=1)
    t_t['p_c']  =  t_t['c'].map({1:p_p1, 2:p_(p_p1)})
    #print(t_t)
    t_t['j_p'] = t_t[['p_f1','p_f2', 'p_c']].apply(basic_prob_law_reduction, axis=1)
    #print(t_t)
    #Again this is the wrong result; 0.5 is multiplied by both, when it needn't be.
    result = (t_t[(t_t['f1']==f_1) & (t_t['f2']==f_2)]['j_p']).sum()
    
    return result
    #return t_t

In [349]:
#functools.reduce(lambda x,y: x.get(y),['a','b'],  {'a':{'b':1}})
assert np.isclose(two_coins_3(), 0.17), 'Also got using a dictionary reduction method'

In [352]:
def two_coins_4(p_h1=1, p_h2 = 0.6, p_p1 = 0.5,f_1='t',f_2='t'):
    """
    :params p_h1: P(H|C1)
    :params p_h2: P(H|C2)
    :params p_p1: P(C1)
    :params f_1: First flip result match
    :params f_2: Second flip result match
    :returns: P(T,T)
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48729372/e-48701556/m-48691629
    """
    return two_coins_3(p_h1, p_h2, p_p1, f_1=f_1, f_2=f_2)

In [353]:
assert np.isclose(two_coins_4(),0.08), 'Should be 0.08, very unlikely as P(H|C1) is 1;  so only possibility is 0.5 * 0.4 * 0.4' 