# Question -3

Explore various probabalities of success, and determine, for each of those, what the probability is to find at least 6 of 10 pro-boost students at UMD.

In [4]:
import itertools
import pandas as pd

n = 10
outcomes = list(itertools.product([0, 1], repeat=n))

def get_dataframe_of_outcome_probabilities(true_probability_of_success):
    num_successes, probabilities = [], []
    for outcome in outcomes:
        num_success = sum(outcome)
        num_successes.append(num_success)
        probability = true_probability_of_success**num_success*(1-true_probability_of_success)**(n-num_success)
        probabilities.append(probability)

    df = pd.DataFrame(
        {
            'outcome': outcomes, 
            'num_success': num_successes, 
            'probability': probabilities
        }
    )
    return df


def get_prob_at_least_6(true_probability_of_success):
    df = get_dataframe_of_outcome_probabilities(true_probability_of_success)
    num_success_to_probability = df.groupby('num_success').probability.sum()
    probability_at_least_6 = 0
    for num_success, probability in num_success_to_probability.items():
        if num_success >= 6:
            probability_at_least_6 += probability
    return probability_at_least_6

In [7]:
for p in [.25, .4, .8]:
    print(f'The probability of at least 6 of 10 sampled students is {get_prob_at_least_6(p):.4f} '
          f'when the true probability of success is {p:.4f}')

The probability of at least 6 of 10 sampled students is 0.0197 when the true probability of success is 0.2500
The probability of at least 6 of 10 sampled students is 0.1662 when the true probability of success is 0.4000
The probability of at least 6 of 10 sampled students is 0.9672 when the true probability of success is 0.8000


# Question -2

In [8]:
get_prob_at_least_6(.26)

0.0239147857755287

In [9]:
get_prob_at_least_6(.27)

0.028722439060701044

In [10]:
get_prob_at_least_6(.28)

0.03419943998847976

In [11]:
get_prob_at_least_6(.29)

0.04039316337748609

In [12]:
get_prob_at_least_6(.3)

0.04734898739999998

In [13]:
get_prob_at_least_6(.31)

0.05510969768059453

In [14]:
get_prob_at_least_6(.305)

0.05112617505453479

In [15]:
get_prob_at_least_6(.304)

0.05035438975592263

In [16]:
get_prob_at_least_6(.303)

0.049590818699766004

So . . . if I were a statistician in a multiverse where the true proportion of pro-boosted students is .303 (of course I wouldn't know I were in that multiverse), it would be the case that less than 5% of the time we get more than half our sample of 10 students that are pro-covid booster.

# Question -1

In [43]:
p = .303

df = get_dataframe_of_outcome_probabilities(p)

In [44]:
df.head()

Unnamed: 0,outcome,num_success,probability
0,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)",0,0.02706
1,"(0, 0, 0, 0, 0, 0, 0, 0, 0, 1)",1,0.011764
2,"(0, 0, 0, 0, 0, 0, 0, 0, 1, 0)",1,0.011764
3,"(0, 0, 0, 0, 0, 0, 0, 0, 1, 1)",2,0.005114
4,"(0, 0, 0, 0, 0, 0, 0, 1, 0, 0)",1,0.011764


In [45]:
import random

In [46]:
results = random.choices(df.num_success, weights=df.probability, k=1000)
results[:5]

[0, 5, 0, 3, 5]

In [50]:
count_number_greater_than_5 = 0
for result in results:
    if result > 5:
        count_number_greater_than_5 += 1
print(count_number_greater_than_5 / 1000)

0.052


In [52]:
import collections

In [53]:
counter = collections.Counter(results)

In [55]:
for num_success, count in counter.items():
    if num_success > 5:
        print(count)

44
7
1
