In [1]:
# pyplot for plotting
import matplotlib.pyplot as plt
# numpy for vectorized array operations
import numpy as np
# pandas for proper tabular manipulation
import pandas as pd
# scipy stats for our subversions
from scipy import stats

## Q1) How likely is it that you roll doubles when rolling two dice?



In [2]:
n_trials = nrows = 10_000
n_dice = ncols = 2

rolls = np.random.choice([1, 2, 3, 4, 5, 6], n_trials * n_dice).reshape(nrows, ncols)
rolls


array([[4, 4],
       [6, 2],
       [3, 3],
       ...,
       [5, 3],
       [4, 2],
       [2, 1]])

## Q2) If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?



by theoretically,
### Combination Formula:
    -The total number of outcomes is 2^8 because each coin can either be heads or tails, and there are 8 coins.

    -The number of ways to get exactly 3 heads is given by 
     C(8,3), which is the number of combinations of 8 items taken 3 at a time.

    -The probability is then C(8,3).
    
### For more than 3 heads:
    -You'd sum up the probabilities of getting 4, 5, 6, 7, and 8 heads.

    -For each case, use the combination formula to find the number of ways to get that specific number of heads.

    -The probability is then the sum of these individual probabilities.

Here are the steps:

### Exactly 3 heads:
    - C(8,3)=8!/(3!(8−3)!=8×7×6/3×2×1=56

 
    - Probability = 56/256 =7/32

 
### More than 3 heads:

    - P(X>3)=P(X=4)+P(X=5)+P(X=6)+P(X=7)+P(X=8)
            =1/2^8[c(8,4)+c(8,5)+c(8,6)+c(8,7)+c(8,8)]



This provides an empirical estimation based on counting the outcomes that meet the specified conditions. Keep in mind that this is an approximation and becomes more accurate with a larger number of trials or simulations.

In [3]:
# experimental simulation

import random

def simulate_coin_flips(num_flips, num_simulations):
    count_3_heads = 0
    count_more_than_3_heads = 0

    for _ in range(num_simulations):
        # Simulate flipping 8 coins
        flips = [random.choice([1, 0]) for _ in range(num_flips)]  # 1 for heads, 0 for tails

        # Count the number of heads
        num_heads = sum(flips)

        # Check if exactly 3 heads
        if num_heads == 3:
            count_3_heads += 1

        # Check if more than 3 heads
        if num_heads > 3:
            count_more_than_3_heads += 1

    # Calculate probabilities
    prob_3_heads = count_3_heads / num_simulations
    prob_more_than_3_heads = count_more_than_3_heads / num_simulations

    return prob_3_heads, prob_more_than_3_heads

# Number of coin flips
num_flips = 8

# Number of simulations
num_simulations = 100000

# Run simulation
prob_3_heads, prob_more_than_3_heads = simulate_coin_flips(num_flips, num_simulations)

# Print results
print(f"Probability of exactly 3 heads: {prob_3_heads}")
print(f"Probability of more than 3 heads: {prob_more_than_3_heads}")


Probability of exactly 3 heads: 0.21938
Probability of more than 3 heads: 0.63687


## Q3) There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

# Theoretically, 
If there are approximately 3 web development cohorts for every 1 data science cohort, and Codeup randomly selects an alumni to put on a billboard, the probability of seeing a data science student on one billboard can be represented as 

P(Data Science)=1/4, and the probability of seeing a web development student on one billboard is 

P(Web Development)=3/4.

Since these events are independent (assuming one billboard doesn't affect the other), you can multiply the probabilities to find the probability of both events happening (seeing data science students on both billboards):

P(Data Science on both)=P(Data Science)×P(Data Science)

P(Data Science on both)=1/4 × 1/4=1/16

So, the odds that the two billboards you drive past both have data science students on them are 1/16.

In [4]:
import random

def simulate_billboard_selection(num_simulations):
    count_both_data_science = 0

    for _ in range(num_simulations):
        # Simulate selecting alumni for two billboards
        billboard1 = random.choice(['Web Dev', 'Web Dev', 'Web Dev', 'Data Science'])
        billboard2 = random.choice(['Web Dev', 'Web Dev', 'Web Dev', 'Data Science'])

        # Check if both billboards feature data science students
        if billboard1 == 'Data Science' and billboard2 == 'Data Science':
            count_both_data_science += 1

    # Calculate probability
    prob_both_data_science = count_both_data_science / num_simulations

    return prob_both_data_science

# Number of simulations
num_simulations = 100000

# Run simulation
prob_both_data_science = simulate_billboard_selection(num_simulations)

# Print result
print(f"Empirical Probability of both billboards featuring Data Science students: {prob_both_data_science}")


Empirical Probability of both billboards featuring Data Science students: 0.06191


## Q4) Codeup students buy, on average, 3 poptart packages with a standard deviation of 1.5 a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

(Remember, if you have mean and standard deviation, use the np.random.normal) You'll need to make a judgement call on how to handle some of your values

In [5]:


# Parameters
mean_poptarts_per_day = 3
std_dev_poptarts_per_day = 1.5
days_until_friday = 4  # Monday to thursday

# Simulate the number of poptarts bought each day
poptarts_bought_each_day = np.random.normal(mean_poptarts_per_day, std_dev_poptarts_per_day, days_until_friday)

# Calculate the total poptarts remaining after each day
poptarts_remaining = 17 - np.cumsum(poptarts_bought_each_day)

# Check if there are poptarts left on Friday afternoon
poptarts_available_on_friday = poptarts_remaining[-1] > 0

# Calculate the likelihood
likelihood = np.mean(poptarts_available_on_friday) * 100

# Print the results
print(f"The likelihood of being able to buy poptarts on Friday afternoon is approximately: {likelihood:.2f}%")


The likelihood of being able to buy poptarts on Friday afternoon is approximately: 0.00%


## Q5) Compare Heights

- Men have an average height of 178 cm and standard deviation of 8cm.

- Women have a mean of 170, sd = 6cm.

- Since you have means and standard deviations, you can use np.random.normal to generate observations.

- If a man and woman are chosen at random, what is the likelihood the woman is taller than the man?

In [6]:
# Set up parameters
mean_height_men = 178
std_dev_men = 8

mean_height_women = 170
std_dev_women = 6

# Number of simulations
num_simulations = 100000

# Generate random heights for men and women
heights_men = np.random.normal(mean_height_men, std_dev_men, num_simulations)
heights_women = np.random.normal(mean_height_women, std_dev_women, num_simulations)

# Calculate the likelihood that a woman is taller than a man
likelihood_taller_woman = np.mean(heights_women > heights_men) * 100

# Print the result
print(f"The likelihood that a randomly chosen woman is taller than a randomly chosen man is approximately: {likelihood_taller_woman:.2f}%")


The likelihood that a randomly chosen woman is taller than a randomly chosen man is approximately: 21.31%


## Q6) When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. 

What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

What is the probability that we observe an installation issue within the first 150 students that download anaconda?

How likely is it that 450 students all download anaconda without an issue?

In [7]:
# no installation issue for 50 students


# Number of students
num_students = 50

# Probability of success (no installation issue)
p_success = 1 - 1/250

# Simulate 50 students and count the number of successes (no installation issues)
simulations = np.random.binomial(n=1, p=p_success, size=num_students)
num_no_issue_50_students = np.sum(simulations == 1)

# Calculate the probability from the simulation
prob_no_issue_50_students_sim = num_no_issue_50_students / num_students

print(f"Simulated probability of no installation issue for 50 students: {prob_no_issue_50_students_sim:.6f}")


Simulated probability of no installation issue for 50 students: 1.000000


In [8]:
# no installation issue for 100 students

# Number of students
num_students = 100

# Simulate 100 students and count the number of successes (no installation issues)
simulations = np.random.binomial(n=1, p=p_success, size=num_students)
num_no_issue_100_students = np.sum(simulations == 1)

# Calculate the probability from the simulation
prob_no_issue_100_students_sim = num_no_issue_100_students / num_students

print(f"Simulated probability of no installation issue for 100 students: {prob_no_issue_100_students_sim:.6f}")


Simulated probability of no installation issue for 100 students: 0.990000


In [9]:
# no installation issue for 150
# Number of students
num_students = 150

# Simulate 150 students and check if there is at least one success (no installation issue)
simulations = np.random.binomial(n=1, p=p_success, size=num_students)
issue_within_150_students_sim = np.any(simulations == 1)

print(f"Simulated probability of at least one installation issue within the first 150 students: {issue_within_150_students_sim}")


Simulated probability of at least one installation issue within the first 150 students: True


In [10]:
# no installation issue for 450
# Number of students
num_students = 450

# Simulate 450 students and count the number of successes (no installation issues)
simulations = np.random.binomial(n=1, p=p_success, size=num_students)
num_no_issue_450_students = np.sum(simulations == 1)

# Calculate the probability from the simulation
prob_no_issue_450_students_sim = num_no_issue_450_students / num_students

print(f"Simulated probability of no installation issue for 450 students: {prob_no_issue_450_students_sim:.6f}")


Simulated probability of no installation issue for 450 students: 0.997778


## Q7) There's a 70% chance on any given day that there will be at least one food truck at Travis Park. 

However, you haven't seen a food truck there in 3 days. How unlikely is this?

How likely is it that a food truck will show up sometime this week?



In [11]:
# Parameters
probability_food_truck_daily = 0.7
days_without_food_truck = 3
num_simulations = 100000

# Simulate the presence of a food truck for each day
simulations = np.random.rand(num_simulations, 7) < probability_food_truck_daily

# Calculate the probability of not seeing a food truck for 3 consecutive days
no_food_truck_3_days_sim = np.sum(np.all(simulations[:, :3] == 0, axis=1)) / num_simulations

# Calculate the probability of seeing a food truck at least once during the week
food_truck_this_week_sim = np.sum(np.any(simulations, axis=1)) / num_simulations

print(f"Simulated probability of not seeing a food truck for 3 consecutive days: {no_food_truck_3_days_sim:.6f}")
print(f"Simulated probability of seeing a food truck at least once during the week: {food_truck_this_week_sim:.6f}")


Simulated probability of not seeing a food truck for 3 consecutive days: 0.026780
Simulated probability of seeing a food truck at least once during the week: 0.999800


## Q8) If 23 people are in the same room, 

what are the odds that two of them share a birthday? 

What if it's 20 people? 40?



In [12]:
def birthday_probability(num_people):
    p_no_shared_birthday = 1.0
    for i in range(num_people):
        p_no_shared_birthday *= (365 - i) / 365
    p_shared_birthday = 1 - p_no_shared_birthday
    return p_shared_birthday



In [16]:
# Calculate the probability for 23 people
prob_23_people = birthday_probability(23)
print(f"Probability of at least two people sharing a birthday (23 people): {prob_23_people:.4f}")


Probability of at least two people sharing a birthday (23 people): 0.5073


In [17]:
# Calculate the probability for 20 people
prob_20_people = birthday_probability(20)
print(f"Probability of at least two people sharing a birthday (20 people): {prob_20_people:.4f}")


Probability of at least two people sharing a birthday (20 people): 0.4114


In [18]:
# Calculate the probability for 40 people
prob_40_people = birthday_probability(20)
print(f"Probability of at least two people sharing a birthday (40 people): {prob_40_people:.4f}")


Probability of at least two people sharing a birthday (40 people): 0.4114
