# Random Numbers & Probability — Solutions
Worked answers for the exercises notebook.

## 0) Setup

In [2]:
# Run this cell first
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import uniform, binom, norm
from webcolors import names

rng = np.random.default_rng(123)
pd.set_option('display.precision', 3)


In [3]:
# Tiny "real-world" sales table to sample from
sales_counts = pd.DataFrame({
    'name': ['Amir', 'Brian', 'Claire', 'Damian'],
    'n_sales': [178, 128, 75, 69]
})
sales_counts


Unnamed: 0,name,n_sales
0,Amir,178
1,Brian,128
2,Claire,75
3,Damian,69


## 1) What are the chances? — Solutions
**A1.** P(heads)=1/2=0.5

**A2.** 4 people → P(Claire)=1/4=0.25

## 2) With or without replacement? — Solutions
**A3.** Without replacement → **dependent** (first pick changes the pool). With replacement → **independent**.

**A4.** See code output for repeats.

In [46]:
draw_without = sales_counts.sample(2, replace=False)
draw_with = sales_counts.sample(5, replace=True)
draw_without, draw_with


(     name  n_sales
 0    Amir      178
 2  Claire       75,
      name  n_sales
 0    Amir      178
 1   Brian      128
 3  Damian       69
 2  Claire       75
 3  Damian       69)

## 3) Calculating probabilities via simulation — Solutions

In [61]:
names = sales_counts['name'].to_numpy()
picks = np.array([rng.choice(names) for i in range(10000)])
p_Amir_est = np.mean(picks == 'Amir')
p_Amir_est


np.float64(0.2509)

(## 4) Discrete distributions — Solutions

In [None]:
die = pd.DataFrame({'number': np.arange(1,7), 'prob': np.repeat(1/6, 6)})
die

In [None]:
ev_die = np.sum(die['number'] * die['prob'])
ev_die

In [None]:
rolls_50 = rng.choice(die['number'], size=50, replace=True, p=die['prob'])
plt.figure()
plt.hist(rolls_50, bins=np.arange(1,8))
plt.title('50 simulated die rolls')
plt.xlabel('Outcome')
plt.ylabel('Count')
plt.show()

## 5) Identifying distributions (uneven) — Solutions

In [None]:
uneven_probs = np.array([1/3, 0, 1/6, 1/6, 1/6, 1/6])
uneven = pd.DataFrame({'number': np.arange(1,7), 'prob': uneven_probs})
ev_uneven = np.sum(uneven['number'] * uneven['prob'])
uneven, ev_uneven

## 6) Expected value vs sample mean — Solutions

In [None]:
def sample_mean_of_rolls(n):
    xs = rng.choice(die['number'], size=n, p=die['prob'])
    return float(np.mean(xs))
means = {n: sample_mean_of_rolls(n) for n in [50, 500, 5000]}
means

## 7) Continuous distributions — uniform — Solutions

In [None]:
a, b = 0.0, 12.0
p_le_7 = (7 - a)/(b - a)
p_ge_7 = 1 - p_le_7
p_4_7 = (7 - 4)/(b - a)
p_le_7, p_ge_7, p_4_7

In [None]:
waits_u = rng.uniform(0, 12, size=1000)
plt.figure()
plt.hist(waits_u, bins=20)
plt.title('Uniform(0,12) simulated waits (n=1000)')
plt.xlabel('Minutes')
plt.ylabel('Count')
plt.show()

## 8) Exponential waits — Solutions

In [None]:
mean_wait = 6.0
waits_e = rng.exponential(scale=mean_wait, size=1000)
p_est = float(np.mean(waits_e <= 4))
p_theory = 1 - np.exp(-4/mean_wait)
p_est, p_theory

In [None]:
plt.figure()
plt.hist(waits_e, bins=30)
plt.title('Exponential(mean=6) simulated waits (n=1000)')
plt.xlabel('Minutes')
plt.ylabel('Count')
plt.show()

## 9) Binomial distribution — Solutions

In [None]:
sim_wins = rng.binomial(n=10, p=0.3, size=20)
sim_wins

In [51]:
p_eq_7 = binom.pmf(7, 10, 0.5)
p_le_7 = binom.cdf(7, 10, 0.5)
p_eq_7, p_le_7

(np.float64(0.11718749999999999), np.float64(0.9453125))

In [None]:
expected_wins = 20 * 0.3
expected_wins

## 10) Wrap-up — Notes
- **Expected value** is the long-run average defined by the distribution.
- **Sample mean** is the average of observed outcomes; it approaches the expected value as the sample size increases (Law of Large Numbers).