In [1]:
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import functools

%matplotlib inline


# Bayes Rule
 - Reverend Thomas Bayes
 - To infers existence of God
 - Holy Grail of Probabilistic thinking
 - Informed Artificial Intelligence


In [2]:
def cancer_test():
    """
    :returns: P(C|P);
    Prob you have cancer given the test is positive
    Solution: https://www.udacity.com/course/viewer#!/c-st101/l-48703346/e-48759018/m-48480554
    
    """
    #returning this suggests your chances of having cancer
    #Haven't changed regardless of the test returning positive
    p_c = 0.01 
    
    #Returning this is an answer to a different question;
    #this is the probability of positive test result
    #This question asks about the probability of cancer
    p_p_g_c = 0.9
    
    p_c_g_p = 0.08
    
    #Since sensitivity (i.e. the amount of confidence we can put)
    #That a positive test means a positive cancer?
    #Is high; we expect it increases our chances from the generic population
    assert p_c_g_p > p_c
    
    #And no relation to p_p_g_c; different question
    
    return p_c_g_p
    

# Sensitivity vs Specificity 

 - **Sensitivity**
  - Chances the outcome (i.e. have disease) is predicted by the indicator (i.e. a test says you have disease) 
  - the amount of confidence we can have in this test
 - **Specificity**
  - Chances the lack of outcome (i.e. no disease) is predicted by indicator (i.e. a test says you dont have disease)
 
 If we are conservative, (in medicine we are) then... 
  - we are willing to say we have cancer even if we dont (fail sensitivity test)
  - and we really want to be sure that a negative test, can be reliable (pass specificity test)
 



In [3]:
class Event(object):
    
    GIVEN = ' given '
    NOT = 'not'
    AND = ' and '
    NEGATIVE = 'negative'
    POSITIVE = 'positive'
    
    #def __new__(cls, name, prob, prob_universeprob_population={}):
    #    return super().__new__(cls, name, prob, prob_population)
    # Was gonna check first the population, but...
    
    def __init__(self, name):#, *args, **kwargs):
        self.__name = str(name).strip()
        
    def __neg__(self):
        return self.__invert__()
    
    def __invert__(self):
        """
        The idea of a binary event
        :returns: the 'inverse' of this event; this event not-happening
        """
        # A conditional event; just negate the posterior
        if self.given():
            return (~self.posterior()) | self.given()
        
        # A joint event; negate both and re-join
        if all(self.joint()):
            return ~self.joint()[0]  & ~self.joint()[1]
        
        # A negative single event; make positive
        if self.__name.startswith(self.NOT):
            return Event(self.__name.lstrip(self.NOT).strip('()'))
            
        # A positive single event; make negative
        else:
            #if self.POSITIVE in self.__name:
            #    not_name = self.__name.replace(self.POSITIVE, self.NEGATIVE)
            #elif self.NEGATIVE in self.__name:
            #    not_name = self.__name.replace(self.NEGATIVE, self.POSITIVE)
            #else:
            return Event('{0}({1})'.format(self.NOT, self.__name))
        
    def __or__(self, event_2):
        """Notice this would *not* support
        transitive property as "or" normally should
        Though I guess "truthied" things aren't actually transitive
        (A__or__B == B__or__A)
        """
        return self.given(event_2)
    
    def __and__(self, event_2):
        """
        """
        return self.joint(event_2)
    
    def joint(self, event_2=None):
        """
        Quiz: Prior And Posterior
        CORRECTION: From 0:45 onward, the posterior for the cancer case 
        should be written as P(C, Pos) instead of P(C|Pos), and the 
        one for the non-cancer case should read P(~C, Pos) instead 
        of P(~C|Pos).
        
        there is a correction to when
        consider that self.AND could be the *comma*, 
        https://classroom.udacity.com/courses/st101/lessons/48703346/concepts/487236370923
        """
        if event_2 is None:
            if self.AND in self.__name:
                return [Event(_) for _ in self.__name.split(self.AND)]
            else:
                return [None, None]
        else:
            and_name = '{0}{1}{2}'.format(self, self.AND, event_2)
            return Event(and_name)
        
    def alt_events(self):
        first, second = self.joint()
        other_joint = '{0}{1}{2}'.format(second, self.AND, first) 
        return [Event(_) for _ in [other_joint]]
    
    def posterior(self):
        """
        If this is a conditional,
        P(Cancer|Pos)
        :returns: the name of the posterior event, that the patient has Cancer
        """
        #GET method
        if self.GIVEN in self.__name:
            #assert self.GIVEN in self.__name, 'No second event'
            return Event(self.__name.split(self.GIVEN)[0])
        #No SET method because there is not left-to-right syntax
        #Whereas the "given" method creates a new conditional
            
            
   
    def given(self, event_2=None):
        """
        If this is a conditional, event
        P(Cancer|Pos)
        :params event_2: the second event IF you want to create a new given event
        :returns: the name of the original event, that test is Positive, if no event_2 param passed and this is a conditional event
        Else a new conditional event created by combining self and event_2
        """
        #GET method
        if event_2 is None:
            
            if self.GIVEN in self.__name:
                #assert self.GIVEN in self.__name, 'No first event'
                return Event(self.__name.split(self.GIVEN)[1])
        #SET method
        else:
            given_name = '{0}{1}{2}'.format(self, self.GIVEN, event_2)
            #given_prob = prob
            return Event(given_name) #, given_prob, self.__probs)
   
    def __nickname__(self):
        """
        Would want nicknames for like 'gay' could by 'G'
        And other  conventions (saved as constants) like 'N' for 'not'
        
        'G' for given; or the symbol '|'
        'A' for and; or the symbol or acceptable '&'
        """
        return (self.__name
                .replace(' given ', '|')
                .replace(' and ', ',')
                .upper())
        
    def __repr__(self):
        return 'Event("{}")'.format(self.__name)

    def __str__(self):
        return self.__name

    def __hash__(self):
        return hash(tuple(sorted(self.__name.split(self.AND))))

    def __eq__(self, other):
        return hash(self) == hash(other)

In [4]:
class Probability(dict):
        
    def __call__(self, event, *args, **kwargs):
        return self.__getitem__(event)
        
    def __getitem__(self, key):
        """
        Not a sequence type;
        Not implementing negative integers or accepting ranges
        """
        
        result = super().__getitem__(key)
        if not result:
            for alt_event in key.alt_events():
                result = super().__getitem__(alt_event)
                if result:
                    break
                    
        return result
    
    def __set__(self, event, prob):
       
        assert event is not None, 'Event must be not None'
            
        prob = float(prob)
        event = Event(event)
        
        assert 0 <= prob <= 1, 'Dont know probability of {0} ' \
        '{1} outside of limits 0 and 1'.format(event, prob) 
        
        #If already set, dont continue
        if self.get(event):
            return 
        
        print('setting event "{}" to prob {}'.format(event, prob))
        super().__setitem__(event, prob)
        
        #Create Joint from Conditional and Single matching Original Event
        #=================
        #Scenario 1; event = P(A|B) and already have P(B)
        #Can create P(A&B)
        
        
        try:
            conditional_event = event
            conditional_prob = prob
            
            given_event = event.given()
            given_event_prob = self[given_event]
            
            #Now set the probability (in the entire event space) 
            #of *both* the conditional event, and also the joint event; P(A&B):
            #Who is the P(B) * P(A|B)
            joint_event = conditional_event.posterior() & conditional_event.given()
            joint_prob = conditional_prob * given_event_prob
            
            self.__set__(joint_event, joint_prob)
            
            #Create not from conditional
            #=================
            #Scenario 1a; event = P(A|B) 
            # Can create P(~A|B)
            not_posterior_same_given = ~conditional_event.posterior()  | given_event
            not_posterior_same_given_prob = 1 - prob
            self.__set__(not_posterior_same_given, not_posterior_same_given_prob)
            
            
            
        except KeyError as e:
            msg = 'dont have P({})'.format(given_event) if given_event is not None else 'no event.given()'
            #print('No joint, {}'.format(msg))
            
        #Create Joint from Single and any matching Conditional
        #================
        #Scenario 2; event = P(B) and already have P(N|B); 
        #    notice how "N" could be anything! Could be more than one; for-loop it
        #Can create P(N&B)
        try:
            original_prob = prob
            original_event = event
            conditional_events = [_ for _ in self.events() if _.given()==original_event][0]
  
            for conditional_event in conditional_events:
                conditional_prob = self[conditional_event]
                
                joint_event = conditional_event.posterior() & conditional_event.given()
                joint_prob = conditional_prob * original_prob
                
                #print('joint')
                self.__set__(joint_event, joint_prob)
            
        except Exception as e:
            pass#print('No joint, dont have events with original {}'.format(original_event))#pass#print('No scenario 2')
            
        #Create Joint from Single and any matching Conditional
        #================
        #Scenario 2; event = P(B) and already have P(N&B) or P(B&N)
        #    notice how "N" could be anything! Could be more than one; for-loop it
        #Can create P(N|B)
        try:
            original_prob = prob
            original_event = event
            import pdb
            #if str(original_event)=='positive':
            #    pdb.set_trace()
            joint_events = [_ for _ in self.events() if original_event in _.joint()]#[0]
  
            for joint_event in joint_events:
                joint_prob = self[joint_event]
                other_event = [_ for _ in joint_event.joint() if _ !=original_event][0]
                
                conditional_event = other_event | original_event
                conditional_prob = joint_prob / original_prob
                
                #print('jointtt')
                self.__set__(conditional_event, conditional_prob)
            
        except Exception as e:
            print(e)
            
        
        #Create Single from Joint and other Single in Joint
        #====================
        #Scenario one; event = P(A&B) and ...
        #we have P(A) so we can create P(A|B) 
        #or we have P(B) and we can create P(B|A)
        e_1, e_2 = event.joint() 
        joint_event = event
        joint_prob = prob
        for original_event, second_event in ((e_1, e_2), (e_2, e_1)):
            try:
                original_prob = self[original_event]
                
                conditional_event = second_event | original_event
                conditional_event_prob = joint_prob / original_prob
                
                #print('conditional')
                self.__set__(conditional_event, conditional_event_prob)

            except Exception as e:
                pass#print('No conditional')#pass
            
            #Create Single from Joint and other Single in Joint
            #====================
            #Scenario one; event = P(A&B) and ...
            #we have P(A&~B) so we can create P(A) 
            #or we have P(B&~A) and we can create P(B)
            #And if we can do those, we have to put them back through this method, right?
            try:
                original_and_not_second_prob = self[original_event & ~second_event]
                
                full_original_event = original_event
                full_original_event_prob = joint_prob + original_and_not_second_prob
                
                #print('conditional')
                #print('full', full_original_event)
                self.__set__(full_original_event, full_original_event_prob)

            except Exception as e:
                if original_event and second_event:
                    pass# print(e)

    def events(self):
        return self.keys()
    
    def __setitem__(self, event, prob):
        #IF we have P(A) we can get P(~A),
        #And if the not would produce any other effects, we should follow those through
        for event, prob in ((event, prob), (~event, 1-prob)):
            #print(event, prob)
            self.__set__(event, prob)
        

In [17]:

P = Probability()
C = Event('cancer')
Pos = Event('positive') 
Neg = ~Pos
p_g_c=Pos | C
#print(p_g_c)
#print(p_g_c.given())
#print(p_g_c.posterior())
#print(Pos | None)

def cancer_test():
    """
    The question being asked is this: 1% of the population has cancer. 
    Given that there is a 90% chance that you will test positive if you 
    have cancer and that there is a 90% chance you will test negative 
    if you don't have cancer, what is the probability that you have 
    cancer if you test positive?
    """

    P = Probability()
    C = Event('cancer')
    Pos = Event('positive') 
    
    P[C] = 0.01
    P[Pos | C] = 0.9
    P[~Pos | ~C] = 0.9

    return P, C, Pos
          
P, C, Pos = cancer_test()

# print(P(C | Pos))
# print(P(~C | Pos))
# print(P(C & Pos))
# print(P(Pos & C))
# print(P(~C & Pos))
# print(P(Pos & ~C))
print(P[~C])
print(P[~Pos|~C])
print(P[Pos|~C])

setting event "cancer" to prob 0.01
setting event "not(cancer)" to prob 0.99
setting event "positive given cancer" to prob 0.9
setting event "positive and cancer" to prob 0.009000000000000001
setting event "not(positive) given cancer" to prob 0.09999999999999998
setting event "not(positive) and cancer" to prob 0.0009999999999999998
setting event "not(positive) given not(cancer)" to prob 0.9
setting event "not(positive) and not(cancer)" to prob 0.891
setting event "not(positive)" to prob 0.892
setting event "cancer given not(positive)" to prob 0.0011210762331838563
setting event "not(cancer) given not(positive)" to prob 0.9988789237668162
setting event "positive given not(cancer)" to prob 0.09999999999999998
setting event "positive and not(cancer)" to prob 0.09899999999999998
setting event "positive" to prob 0.10799999999999998
setting event "cancer given positive" to prob 0.08333333333333336
setting event "not(cancer) given positive" to prob 0.9166666666666666
0.99
0.9
0.09999999999999

![See the venn diagram that Sebastian drew](img/venn_diagram.png)

In [6]:
(C | Pos).joint()

[None, None]

In [7]:
(C | Pos).posterior() , (C | Pos).given()

(Event("cancer"), Event("positive"))

In [8]:
(C & Pos).joint()

[Event("cancer"), Event("positive")]

In [16]:
import pprint
pprint.pprint(list(P.items()))
print('Should have 16 long. 2 for the cancer vs ~cancer. 2 for the pos vs ~pos. 4 for the combination of "givens" in one direction, 4 for the combinations of "givens" in the other direction, and  4 for the combination of "ands" in the middle')
pprint.pprint(len(list(P.items())))

[(Event("cancer"), 0.01),
 (Event("not(cancer)"), 0.99),
 (Event("positive given cancer"), 0.9),
 (Event("positive and cancer"), 0.009000000000000001),
 (Event("not(positive) given cancer"), 0.09999999999999998),
 (Event("not(positive) and cancer"), 0.0009999999999999998),
 (Event("not(positive) given not(cancer)"), 0.9),
 (Event("not(positive) and not(cancer)"), 0.891),
 (Event("not(positive)"), 0.892),
 (Event("cancer given not(positive)"), 0.0011210762331838563),
 (Event("not(cancer) given not(positive)"), 0.9988789237668162),
 (Event("positive given not(cancer)"), 0.09999999999999998),
 (Event("positive and not(cancer)"), 0.09899999999999998),
 (Event("positive"), 0.10799999999999998),
 (Event("cancer given positive"), 0.08333333333333336),
 (Event("not(cancer) given positive"), 0.9166666666666666)]
Should have 16 long. 2 for the cancer vs ~cancer. 2 for the pos vs ~pos. 4 for the combination of "givens" in one direction, 4 for the combinations of "givens" in the other direction, a

In [14]:
0.892 + 0.107999999999999999999


1.0

In [55]:
1/6.

0.16666666666666666

In [40]:
P.keys()

dict_keys([Event("cancer"), Event("not(cancer)"), Event("positive given cancer"), Event("positive and cancer"), Event("not(positive) given cancer"), Event("not(positive) and cancer"), Event("not(positive) given not(cancer)"), Event("not(positive) and not(cancer)"), Event("positive given not(cancer)"), Event("positive and not(cancer)")])

In [236]:
'Event("cancer", 0.5)' == 'Event("cancer", 0.5)'

True

In [20]:
def normalizing_1():
    return (P(C&Pos) + P(~C&Pos))
normalizing_1()

0.10799999999999998

In [21]:
assert np.isclose(normalizing_1(), 0.108), 'Normalized sum of P(C&Pos) + P(~C&Pos) should be around 0.108'

In [28]:
def normalizing_2():
    return P(C|Pos)
normalizing_2()

0.08333333333333336

In [24]:
P(C&Pos),P(Pos)

(0.009000000000000001, 0.10799999999999998)

In [30]:
assert np.isclose(normalizing_2(), 0.0833333), 'The P(C|Pos) 0.0833, which is P(C&Pos) / P(Pos) = 0.009 / 0.108 = 0.0833 '

In [31]:
0.0833333

0.0833333

In [26]:
 0.009 / 0.108

0.08333333333333333

In [33]:
def normalizing_3():
    return P(~C|Pos)
normalizing_3()

0.9166666666666666

In [35]:
P(~C&Pos)

0.09899999999999998

In [36]:
0.09899 / 0.108

0.916574074074074

In [37]:
assert np.isclose(normalizing_3(), 0.916666), 'The P(~C|Pos) 0.9166, which is P(~C&Pos) / P(Pos) = 0.09899 / 0.108 = 0.91657'

In [39]:
def total_probability():
    return P(C|Pos) + P(~C|Pos)
total_probability()

1.0

In [40]:
assert np.isclose(total_probability(), 1.0), 'total_probability, which can only be a sum of conditional probabilities, should be 1, not {}'.format(total_probability())

![Nate added more stuff](img/diagram remade.png)