In [1]:
from datascience import *
import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

In [2]:
def create_population_table(prior_disease_prob, n):
    """Returns table for: pop size n, proportion diseased = prior_disease_prob
    false positive rate 5%, true positive rate 100%"""
    disease = round(n * prior_disease_prob)
    no_disease = round(n * (1 - prior_disease_prob))

    status = np.array(['Disease'] * disease  +  ['No disease'] * no_disease)
    result = np.array(['Test Positive'] * (disease) + ['Test Positive'] * (round(no_disease * 0.05))  + \
                 ['Test Negative'] * (round(no_disease * 0.95)))
                 
    t = Table().with_columns(
    'Status', status,
    'Test Result', result
    ).sample(with_replacement=False)
    return t

# Lecture 38: Updating Probabilities

## Population with the Specified Proportions

Create a population with a specified disease prevalence and population size, assuming that the test specifications are:

- False positive rate: 5%
- True positive rate: 100%

In [3]:
# Population size 10,000
# Table contains counts in the four categories

pop_table = create_population_table(0.001, 10000)
pop_table

Status,Test Result
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative
No disease,Test Negative


In [4]:
pop_table.pivot('Test Result', 'Status').sort('Status', descending=True)

Status,Test Negative,Test Positive
No disease,9490,500
Disease,0,10


In [5]:
# Among those who test positive,
# what proportion have the disease?

10/510

0.0196078431372549

Assume a patient is picked at random from the population.

If the patient tested positive, what is the chance they have the disease?

- Answer: Same as above, 0.0196..

## Calculation by Tree Diagram

In [6]:
# P(Disease | Test Positive)

0.001*1 / (0.001*1 + 0.999*0.05)

0.019627085377821395

## Rare Disease

In [7]:
pop_table.pivot('Test Result', 'Status').sort('Status', descending=True)

Status,Test Negative,Test Positive
No disease,9490,500
Disease,0,10


## Changing the Prior

In [8]:
# Prior probabiilty of disease: 10%

pop_table_1 = create_population_table(0.1, 10000)
pop_table_1.pivot('Test Result', 'Status').sort('Status', descending=True)

Status,Test Negative,Test Positive
No disease,8550,450
Disease,0,1000


In [9]:
# P(Disease | Test Positive)
# if P(Disease) = 0.1

1000 / 1450

0.6896551724137931

In [10]:
# P(Disease | Test Positive)
# if P(Disease) = 0.1

0.1*1 / (0.1*1 + 0.9*0.05)

0.689655172413793

In [11]:
# P(Disease | Test Positive)
# if P(Disease) = 0.5

0.5*1 / (0.5*1 + 0.5*0.05)

0.9523809523809523

In [12]:
# Prior probabiilty of disease: 50%

pop_table_1 = create_population_table(0.5, 10000)
pop_table_1.pivot('Test Result', 'Status').sort('Status', descending=True)

Status,Test Negative,Test Positive
No disease,4750,250
Disease,0,5000


In [13]:
# P(Disease | Test Positive)
# if P(Disease) = 0.5

5000 / 5250

0.9523809523809523