# Sample Space Analysis

In [2]:
sample_space = {'Heads', 'Tails'}

In [3]:
probability_heads = 1 / len(sample_space)
print(f'Probability of choosing heads is {probability_heads}')

Probability of choosing heads is 0.5


**event**: subset of those elements within a `sample_space` that satisfy some event condition

**event condition**: a simple Boolean function whose input is a single `sample_space` element. The function returns `True` only if the element satisfies our condition constraint.

## defining event conditions

In [4]:
def is_heads_or_tails(outcome): return outcome in {'Heads', 'Tails'}
def is_neither(outcome): return not is_heads_or_tails(outcome)

In [5]:
def is_heads(outcome): return outcome == 'Heads'
def is_tails(outcome): return outcome == 'Tails'

In [6]:
def get_event(event_condition, sample_space):
    return set([outcome for outcome in sample_space
               if event_condition(outcome)])

In [7]:
event_conditions = [is_heads_or_tails, is_heads, is_tails, is_neither]

for event_condition in event_conditions:
    print(f'Event Condition: {event_condition.__name__}')
    event = get_event(event_condition, sample_space)
    print(f'Event: {event}\n')

Event Condition: is_heads_or_tails
Event: {'Tails', 'Heads'}

Event Condition: is_heads
Event: {'Heads'}

Event Condition: is_tails
Event: {'Tails'}

Event Condition: is_neither
Event: set()



In [8]:
def compute_probability(event_condition, generic_sample_space):
    event = get_event(event_condition, generic_sample_space)
    return len(event) / len(generic_sample_space)

for event_condition in event_conditions:
    prob = compute_probability(event_condition, sample_space)
    name = event_condition.__name__
    print(f"Probability of event arising from '{name}' is '{prob}'")

Probability of event arising from 'is_heads_or_tails' is '1.0'
Probability of event arising from 'is_heads' is '0.5'
Probability of event arising from 'is_tails' is '0.5'
Probability of event arising from 'is_neither' is '0.0'


## Analyzing a biased coin

In [9]:
weighted_sample_space = {'Heads': 4, 'Tails': 1}

In [10]:
sample_space_size = sum(weighted_sample_space.values())
assert sample_space_size == 5

In [11]:
event = get_event(is_heads_or_tails, weighted_sample_space)
event_size = sum(weighted_sample_space[outcome] for outcome in event)
assert event_size == 5

In [12]:
def compute_event_probability(event_condition, generic_sample_space):
    event = get_event(event_condition, generic_sample_space)
    if type(generic_sample_space) == type(set()):
        return len(event) / len(generic_sample_space)
    
    event_size = sum(generic_sample_space[outcome]
                    for outcome in event)
    return event_size / sum(generic_sample_space.values())

In [13]:
for event_condition in event_conditions:
    prob = compute_event_probability(event_condition, weighted_sample_space)
    name = event_condition.__name__
    print(f"Probability of event arising from '{name}' is '{prob}'")

Probability of event arising from 'is_heads_or_tails' is '1.0'
Probability of event arising from 'is_heads' is '0.8'
Probability of event arising from 'is_tails' is '0.2'
Probability of event arising from 'is_neither' is '0.0'


## Computing Non-Trivial Probabilities

### Problem: for a family with 4 children, what is the probability that exactly two of the children are boys (assuming each sex is equality probable)?

In [14]:
possible_children = ['Boy', 'Girl']
sample_space = set()
for child1 in possible_children:
    for child2 in possible_children:
        for child3 in possible_children:
            for child4 in possible_children:
                outcome = (child1, child2, child3, child4)
                sample_space.add(outcome)

In [15]:
# more efficient example of above set creation
from itertools import product
all_combinations = product(*(4 * [possible_children]))
assert set(all_combinations) == sample_space

#### note for above code:
The * operator unpacks multiple arguments stored within a list.
Thus, calling ` product(*(4 * [possible_children]))` is equivalent to calling product(possible_children, possible_children, possible_children, possible_children).

In [16]:
# even more efficient variation of above set creation
sample_space_efficient = set(product(possible_children, repeat=4))
assert sample_space == sample_space_efficient

In [17]:
def has_two_boys(outcome): return len([child for child in outcome
                                      if child == 'Boy']) == 2

prob = compute_event_probability(has_two_boys, sample_space)
print(f"Probability of 2 boys is '{prob}'")

Probability of 2 boys is '0.375'


### Problem 2: Analyzing Multiple Dice Rolls: What is the probability that the sum of 6 consecutive rolls on a fair, 6-sided die equals 21?

In [18]:
possible_rolls = list(range(1, 7))
print(possible_rolls)

[1, 2, 3, 4, 5, 6]


In [19]:
sample_space = set(product(possible_rolls, repeat=6))

In [20]:
# Book error with assertion
def has_sum_of_21(outcome): return sum(outcome) == 21
prob = compute_event_probability(lambda x: x == 21,
                                weighted_sample_space)
# assert prob == compute_event_probability(has_sum_of_21, sample_space)
print(f"Probability of dice summing to 21 is '{compute_event_probability(has_sum_of_21, sample_space)}'")

Probability of dice summing to 21 is '0.09284979423868313'


In [21]:
# using lambda expression
prob = compute_event_probability(lambda x: sum(x) == 21, sample_space)
assert prob == compute_event_probability(has_sum_of_21, sample_space)

### Problem 3: Computing Dice-Roll Probabilities using Weighted Sample Spaces

In [22]:
from collections import defaultdict # This module returns dictionaries whose keys are all assigned a default value. For instance, defaultdict(int) returns a dictionary where the default value for each key is set to zero.
weighted_sample_space = defaultdict(int)
for outcome in sample_space:
    total = sum(outcome)
    weighted_sample_space[total] += 1

In [23]:
assert weighted_sample_space[6] == 1 # equivalent to six consecutive rolls of '1'
assert weighted_sample_space[36] == 1 # equivalent to six consecutive rolls of '6'

In [24]:
num_combinations = weighted_sample_space[21]
print(f"There are '{num_combinations}' ways for six rolled dice to sum to 21")

There are '4332' ways for six rolled dice to sum to 21


In [25]:
assert sum([4, 4, 4, 4, 3, 2]) == 21
assert sum([4, 4, 4, 5, 3, 1]) == 21

In [26]:
prob = compute_event_probability(lambda x: x == 21,
                                weighted_sample_space)
assert prob == compute_event_probability(has_sum_of_21, sample_space)
print(f"Probability of dice summing to 21 is {prob}")

Probability of dice summing to 21 is 0.09284979423868313


In [27]:
# proof that weighted sample space is more memory efficient that unweighted one
print('Number of Elements in Unweighted Sample Space:')
print(len(sample_space))
print('Number of Elements in Weighted Sample Space:')
print(len(weighted_sample_space))

Number of Elements in Unweighted Sample Space:
46656
Number of Elements in Weighted Sample Space:
31


**interval**: set of all numbers that are sandwiched between 2 boundary cutoffs

In [28]:
def is_in_interval(number, minimum, maximum):
    return minimum <= number <= maximum

In [29]:
prob = compute_event_probability(lambda x: is_in_interval(x, 10, 21),
                                weighted_sample_space)
print(f"Probability of interval is {prob}")

Probability of interval is 0.5446244855967078


Interval analysis is critical to solving a whole class of very important problems in probability and statistics. One such problem involves the evaluation of extremes. The problem boils down to whether observed data is too extreme to be believable.

### Problem 4: Computing the sample space for 10 coin-flips to determine if the outcome is too unlikely to be believable

In [30]:
def generate_coin_sample_space(num_flips=10):
    weighted_sample_space = defaultdict(int)
    for coin_flips in product(['Heads', 'Tails'], repeat=num_flips):
        heads_count = len([outcome for outcome in coin_flips
                          if outcome  == 'Heads'])
        weighted_sample_space[heads_count] += 1
        
    return weighted_sample_space

weighted_sample_space = generate_coin_sample_space()
assert weighted_sample_space[10] == 1
assert weighted_sample_space[9] == 10

In [31]:
# compute probability of observing an interval between 8 and 10 heads
prob = compute_event_probability(lambda x: is_in_interval(x, 8, 10),
                                weighted_sample_space)
print(f"Probability of observing more than 7 heads is {prob}")

Probability of observing more than 7 heads is 0.0546875


### what is the probability that the coin-flips do NOT produce between 3 and 7 heads?

In [32]:
prob = compute_event_probability(lambda x: not is_in_interval(x, 3, 7),
                                weighted_sample_space)
print(f"Probability of observing more than 7 heads or 7 tails is {prob}")

Probability of observing more than 7 heads or 7 tails is 0.109375


### Analyzing extreme head-counts for 20 fair coin-flips

In [33]:
weighted_sample_space_20_flips = generate_coin_sample_space(num_flips=20)
prob = compute_event_probability(lambda x: not is_in_interval(x, 5, 15),
                                weighted_sample_space_20_flips)
print(f"Probability of observing more than 15 heads or 15 tails is {prob}")

Probability of observing more than 15 heads or 15 tails is 0.01181793212890625
