# Probability is the opposite of statistics

In [1]:
def statistics_vs_probability():
    return """
    data ---stats--> causes
    data <--prob---- causes
    """
print(statistics_vs_probability())


    data ---stats--> causes
    data <--prob---- causes
    


 - In statistics  ([induction](https://www.quora.com/What-the-difference-between-deduce-and-induce))
    - given data
    - **infer** (general) causes
 - In probability (deduction)
    - given (general) causes
    - **predict** data

In [5]:
def always_heads():
    """
    Based on observation of a fair coin appearing as heads, heads, heads
    Then do you think that coin will *always* come up heads
    """
    observations = set(('heads', 'heads', 'heads'))
    prob_heads = 0.5
    prob_tails = 1-prob_heads
    assert prob_heads == prob_tails, 'Not a fair coin'
    #But notice "Fair coin" is what determines the answer to the question
    #Regardless of observations that may give us a suspicion
    
    #Will be always heads if this is true:
    return prob_heads==1 
assert always_heads()==False, 'It will not always be heads, despite the observations'
    

# Probability is ...
 > Probability is a way of describing the expected outcome
 
 > P(Heads) = 0.5

In [6]:
#In a fair coin
p_h = 0.5 # 1 of 2 ; 1/2

def fair_coin():
    """
    :returns: prob of tails
    """
    return p_h #Same as prob of heads, which is also equal to 1- p_h

def also_fair_coin():
    return 1-p_h

assert fair_coin()==0.5, 'prob tails should be 0.5 not {}'.format(fair_coin())
assert also_fair_coin()==0.5, 'prob tails should be 0.5, not {}'.format(also_fair_coin())
fair_coin_solution = 'https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48738233/m-48698595'


In [8]:
def loaded_coin_1():
    """
    :returns: probability of heads
    if a loaded coin always lands heads
    """
    return 1

def simulated_loaded_coin_1():
    import numpy as np
    many_heads = np.ones(10000)
    return many_heads.sum()/len(many_heads)

assert loaded_coin_1()==1, 'An "always" probability is 1; same as 100%'
assert simulated_loaded_coin_1()==1, 'An "always" probability is 1; same as 100%'
loaded_coin_1_solution = 'https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48754089/m-48721344'

In [11]:
def loaded_coin_2():
    """
    :returns: probability of tails
    if a loaded coin always lands heads
    """
    return 0
def loaded_coin_2_based_on_heads():
    return 1-loaded_coin_1()
assert loaded_coin_2()==0, 'An "never" probability is 0; same as 0%'
assert loaded_coin_2_based_on_heads()==0, 'Should be 0 probability'
loaded_coin_1_solution = 'https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48699715/m-48693696'

In [13]:
p_t = fair_coin()
assert  p_h==p_t, 'A fair coin has equal probabilities in both outcomes H or T'
assert p_h+p_t==1, 'The sum of all probabilities should be one'


# P(H) + P(T) == 1
P(Heads) + P(Tails) = 1

In [6]:
assert loaded_coin_1()+loaded_coin_2()==1, 'The sum of all probabilities should be one'\
'even in the loaded case; a loaded coin does not defy probabilities'
 

In [16]:
def p_(h=None, t=None): 
    #if h is not None:
    #    return 1-h
    #elif t is not None:
    #    return 1-t
    #else:
    #    return 1
    
    return 1-(h or t or 0)

#Returns 1 by default; the totality
assert p_()==1
#Other standard probabilities
assert p_(h=1)==0
assert p_(h=0.5)==0.5
assert p_(t=0.2)==0.8
#Uses first nonzero arg
assert p_(h=0,t=1)==0
assert p_(h=0.2, t=0.5)==0.8

# Note my new way of specifying solution; in the docstring

In [17]:
def loaded_coin_3(h=0.75):
    """
    :returns: P(Tails) if P(Heads) = 0.75
    Solution https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48750147/m-48698590
    """
    return p_(h=h)


In [18]:
assert loaded_coin_3()==0.25

[Symbol links](http://www.rapidtables.com/math/symbols/Statistical_Symbols.htm)

# 1st Basic Probability Law
## P(A) = 1 - P(¬A)

In [20]:
#Stated in Python with the p_ function
p_a = 0.25
p_n_a = p_(p_a)
p_a, p_n_a

def complementary_outcomes(p_a, p_n_a):
    """
    asserts the basic law of probability
    https://www.udacity.com/course/viewer#!/c-st101/l-48738235/m-48742067
    """
    assert p_a == 1 - p_n_a, 'Does not follow basic law in probability'

complementary_outcomes(p_a, p_n_a)

In [31]:
import itertools
import pandas as pd
import numpy as np

h,t='h','t'
print(list(itertools.product(*(2*[(h,t)]))))
#Scenario whre it's a fair coin; each element represents a side of the coin
s_fair_coin = [(h,t)]
#Scenario where H first flip; H second flip; each element represents a scenario
s_h_a_h = [(h,h)] # "_a_ for AND ; _s for scenario 
#All scenarios; "Every possible outcome"; each element represents a scenario
s_all = [(h,t),
         (h,h),
         (t,h),
         (t,t)]

# itertools.product can determine all the scenarios
assert set(s_all) == set(itertools.product(*(2*s_fair_coin)))

#DataFrame doesn't accept print(list(zip(*s_all)))
truth_table = pd.DataFrame.from_records(s_all, columns=('f_1', 'f_2'))
truth_table.index.name = 'Truth Table for coin flipped successively'
print(truth_table)
    
def two_flips_1(num_coins=2):
    """
    :returns: P(H, H) Probability of flipping heads twice
    Of a fair_coin
    P(H) one head; 
    P(H,H) one head followed by another head
    P(H ∩ H) a head AND a head
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48754090/m-48697594
    """
    
    
    return len(s_h_a_h) / len(s_all)

[('h', 'h'), ('h', 't'), ('t', 'h'), ('t', 't')]
                                          f_1 f_2
Truth Table for coin flipped successively        
0                                           h   t
1                                           h   h
2                                           t   h
3                                           t   t


In [32]:
list(itertools.product('abcd'))

[('a',), ('b',), ('c',), ('d',)]

In [36]:
#Notice it already produces the set? 
list(itertools.product(*(['abc']*2)))
list(itertools.product(*(['aac']*2)))

[('a', 'a'),
 ('a', 'a'),
 ('a', 'c'),
 ('a', 'a'),
 ('a', 'a'),
 ('a', 'c'),
 ('c', 'a'),
 ('c', 'a'),
 ('c', 'c')]

In [37]:
two_flips_1()

0.25

In [38]:
def two_flips_2(p_h=0.6):
    """
    :returns: P(T) when P(H) is 0.6; a biased coin
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48738234/m-48678786
    """
    return p_(p_h)

In [39]:
assert two_flips_2() == 0.4, 'P(T) when coin is biased P(H); is 40% or 0.4'

# 2nd Basic Probabiliy Law
## P(H, H) = P(H ∩ H) = P(H) * P(H)

In [40]:
def basic_prob_law(p_1, p_2):
    """
    :returns: joint probability of
    both events happening
    
    """
    return p_1 * p_2

assert basic_prob_law(0.5, 0.5) == 0.25, 'Asserting result of two_flips_1'



In [41]:
import operator
import functools

def basic_prob_law_reduction(p_list):
    """
    :returns: joint probability
    of all events in a list happening
    by applying 2nd law
    http://stackoverflow.com/a/13840436/1175496
    """
    assert all([_<=1 for _ in p_list]), 'Should be less than or equal to 1'
    #assert sum(p_list)==1; they are different events, needn't sum to 1
    return functools.reduce(operator.mul, p_list)

assert np.allclose(basic_prob_law_reduction((0.2,0.2)),0.04), 'Two events each independent and 20% likely to happen, likelihood of both happening is 4%'


In [54]:

def two_flips_3(p_h=0.6):
    """
    :returns: a truth table for all coin flips;
    With an add'l column for the (joint probabiliy)
    of the first two columns based on a coin
    biased at P(H) = 0.6
    
    Uses apply
    http://stackoverflow.com/a/16033048/1175496
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48750148/m-48697599
    """
    #Probabiliy dict, used as a map
    #Keys must correspond to possibilities/scenarios
    p_dict = dict(h=p_h,
                  t=p_(p_h))
    truth_table['p_1']= truth_table['f_1'].map(p_dict)
    truth_table['p_2']= truth_table['f_2'].map(p_dict)
    truth_table['joint_prob'] = truth_table[['p_1','p_2']].apply(basic_prob_law_reduction, axis=1)
    truth_table.index = truth_table['f_1'] + ' & ' + truth_table['f_2']
    #.map(p_dict) * truth_table
    return truth_table['joint_prob']

In [55]:
assert np.allclose(two_flips_3().data,pd.Series(data=(0.24,0.36,0.24,0.16))), 'Possibilities are 0.6 * 0.4, 0.6*0.6, 0.4 * 0.6, and 0.4 * 0.4'

In [56]:
two_flips_3()

h & t    0.24
h & h    0.36
t & h    0.24
t & t    0.16
Name: joint_prob, dtype: float64

In [57]:
def two_flips_4():
    """
    :returns: sum of truth tables for two successive flips of biased coin
    Solution: 
    https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48751112/m-48692689
    """
    return two_flips_3().sum()

In [58]:
assert two_flips_4() == 1.0, 'Sum of probabilities should be equal to 1'

In [59]:
def two_flips_5():
    """
    :returns: P(H,H) of a coin whose P(H) = 1
    Solution: 
    https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48752122/m-48721348
    """
    
    return basic_prob_law(1, 1)

In [61]:
def two_flips_5_truth_table():
    return two_flips_3(1)
print(two_flips_5_truth_table())
two_flips_5_truth_table()['h & h']

h & t    0
h & h    1
t & h    0
t & t    0
Name: joint_prob, dtype: int64


1

In [62]:
assert two_flips_5()==1, 'Joint prob of P(H) twice in a row in biased coin'

In [63]:
truth_table[['f_1','f_2']]

Unnamed: 0,f_1,f_2
h & t,h,t
h & h,h,h
t & h,t,h
t & t,t,t


In [64]:
def one_head_1(exact_num_heads=1,t_t=None):
    """
    :returns: P(Exactly on H) in two coin flips of fair coin
    Solution:
    https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48700596/m-48729224
    """
    t_t = t_t if t_t is not None else truth_table[['f_1','f_2']].copy()
    
    has_num_heads = lambda flip: (flip=='h').sum()==exact_num_heads
    t_t['one_h']=t_t.apply(has_num_heads, axis=1)
    
    num_heads_count = t_t['one_h'].sum()
    total_count = len(t_t)
    
    return num_heads_count/total_count

assert one_head_1()==0.5, 'Two scenarios out of a total of four scenarios; 2/4 == 0.5 possibility of exactly one head'
#del truth_table['one_h']

In [68]:
def one_head_2(exact_num_heads=1, t_t=None):
    """
    :returns: Table; 'one_h' column is true when there is just one head;
    Sorted the same way Sebastian has displayed it; first the heads in flip_1 then the heads in flip_2
    'h' is alphabetically before 't', so...
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48683881/m-48701431
    """
    t_t = t_t if t_t is not None else truth_table[['f_1','f_2']].copy()
    one_head_1(exact_num_heads, t_t)
    return t_t.copy().sort_values(by=['f_1', 'f_2'])

In [69]:
assert np.allclose(one_head_2()['one_h'], pd.Series((False, True, True, False))),'Scenarios with one flip are False, True, True, False if '

In [73]:
def one_of_three_1(t_t=None):
    """
    :returns: P(Exactly one H) in truth table with three flips; fair coin
    Solution:  https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48684856/m-48697602
    """
    three_flips = pd.DataFrame(data=list(itertools.product('ht', 'ht', 'ht')), columns=('f_1','f_2','f_3'))
    t_t = t_t if t_t is not None else three_flips
    return one_head_1(t_t=t_t)

In [74]:
assert one_of_three_1()==0.375, 'Should have 3 scenarios with one heads of 8 possible scenarios; 3/8 == 0.375'

In [75]:
def one_of_three_2(t_t=None,p_h=0.6):
    """
    :returns: P(Exactly on H) in truth table with three flips; P(H) = 0.6 coin
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48752123/m-48697603
    """
    prob_map = dict(h=p_h,t=p_(p_h))
    three_flips = pd.DataFrame(data=list(itertools.product('ht', 'ht', 'ht')), columns=('f_1','f_2','f_3'))
    t_t = t_t if t_t is not None else three_flips
    t_t['p']=t_t.apply(lambda x: basic_prob_law_reduction(x.map(prob_map)), axis=1)
    t_t['one_head']=t_t.apply(lambda x: (x=='h').sum()==1, axis=1)
    print(t_t)
    prob_where_one_head = t_t['p'][t_t['one_head']].sum()
    return prob_where_one_head

In [78]:
assert np.allclose(one_of_three_2(),0.288), 'Probability should be 0.096 * 3==0.288; 0.096 comes from P(H) * P(T) * P(T), 0.6 * 0.4 * 0.4'

  f_1 f_2 f_3      p one_head
0   h   h   h  0.216    False
1   h   h   t  0.144    False
2   h   t   h  0.144    False
3   h   t   t  0.096     True
4   t   h   h  0.144    False
5   t   h   t  0.096     True
6   t   t   h  0.096     True
7   t   t   t  0.064    False


In [81]:
0.5**3 * 3

0.375

In [82]:
(0.6*0.4*0.4)*3

0.28800000000000003

In [83]:
def even_roll():
    """
    :returns: probability that the die is even
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48751113/m-48697589
    
    """
    roll_prob = pd.Series(range(1,7))
    return (roll_prob%2==0).sum() / len(roll_prob)

In [84]:
assert even_roll()==0.5, 'Half dice scenarios (2, 4, 6) are even; the other half odd. Half probability is 0.5'

In [85]:
def doubles():
    """
    :returns: probability that a throw is a double; i.e. same number on both first and second throws
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48738235/e-48754091/m-48678784
    """
    two_rolls = (range(1,7),)*2
    two_roll_prob = pd.DataFrame(list(itertools.product(*two_rolls)), columns=('r1','r2'))
    
    return (two_roll_prob['r1']== two_roll_prob['r2']).sum() / len(two_roll_prob)

In [93]:
assert np.allclose(doubles(), 1/6.), '6 outcomes have both rolls the same; out of 36 possible outcome. 6/36 = 1/6 = 0.166'

In [100]:
assert np.isclose(doubles(), 1/6.), '6 outcomes have both rolls the same; out of 36 possible outcome. 6/36 = 1/6 = 0.166'

In [102]:
assert np.isclose((doubles(),), 1/6.), '6 outcomes have both rolls the same; out of 36 possible outcome. 6/36 = 1/6 = 0.166'

In [103]:
1/6

0.16666666666666666

# Summary
 - Prob event  P(E)
 - Prob opposite P(¬E) = 1-P(E)
 - Prob of composite event P(E,E) = P(E) * P(E)