# Import

In [42]:
import scipy
import functools

import numpy as np
np.random.seed(0)

import random
import math

from scipy.stats import binom
from scipy.stats import chi2_contingency

from tqdm import tqdm

import plotly.express as px

# Puzzles

## 1


She can choose 1, 2, 3, or 4 econ courses. Each will have their own set of possible permutations. Assuming order doesn't matter, for each quantity E of econ courses $\in {1,2,3,4}$ there will be ${4\choose E}$ ways to pick econ courses, and $4-E\choose S$ many ways to choose the number of science courses. Therefore, for each E $\in {1,2,3,4}$, the total number of possible permutations is ${4\choose E} {4-E\choose S}$. To get the total number of permutations, we add each of these together, i.e., 

Total Permuations = $$ \sum_{E \in {1,2,3,4}} {4\choose E} {4-E\choose S} = 425$$

In [2]:
n_choose_k = functools.partial(scipy.special.comb, exact=True) #just sets exact=True

In [3]:
esss = n_choose_k(N=4, k=1) * n_choose_k(N=8, k=3)
eess = n_choose_k(N=4, k=2) * n_choose_k(N=8, k=2)
eees = n_choose_k(N=4, k=3) * n_choose_k(N=8, k=1)
eeee = n_choose_k(N=4, k=4) * n_choose_k(N=8, k=0)

print(f'Total # permutations = {esss + eess + eees + eeee}')

Total # permutations = 425


## 2

In [4]:
((2.25-(.75/2))**2)/(2.25**2)

0.6944444444444444

## 3

## 4


Let $J=$ number of people at CAAI whose names start with J.  
Let $N=$ number of people at CAAI.  

Let $P_1, P_2 = $ the first and second people you meet at CAAI, respectively.  

Then $\Pr\big((P_1 \in J) \land (P_2 \in J)\big) = \frac{1}{2} = \frac{J}{N} * \frac{J-1}{N-1}$

Assuming that $N \leq 50$, the following code indicates that there are two valid possibilities:
* $J=3, N=4$
* $J=15, N=21$

Given the hint, I'd guess that 3 out of 4 people at CAAI have names that start with J. On the other hand, a quick glance at your website shows 11 people in total, and 5 with names that start with "J". With that in mind, I ran an extra simulation to be sure, and my guess appears to be correct.

In [5]:
# for j in range(2,51):
#     for n in range(j+1, 51):
#         if 1/2 == (j/n)*(
#             (j-1)/(n-1)
#             ):
#                 print(f'J={j}, N={n} is a valid possibility')

In [6]:
# def sim_choosing_j_names(n:int, j:int, sample_size:int):
    
#     # create vector of 0 & 1, indicating whether an individual has a name starting with J
#     people=[1]*j + [0]*(n-j) 
#     num_people = len(people)
    
#     # randomly order
#     np.random.shuffle(people)
    
#     met_two_j_names_counter = 0
#     for i in range(sample_size):
#         people_met = random.sample(population=people, k=2)
#         both_j_names = sum(people_met)==2
#         met_two_j_names_counter += both_j_names
        
#     return met_two_j_names_counter / sample_size

In [7]:
# for j in range(2,22):
#     for n in range(j+1, 22):
#         approx_prob_meeting_two_js = sim_choosing_j_names(n=n, j=j, sample_size=100000)
#         if math.isclose(approx_prob_meeting_two_js, .5, rel_tol=0.01): # if approx probability is within 1% of 0.5
#             print(f'J={j}, N={n} is a valid possibility \n Approx prob = {approx_prob_meeting_two_js}')

# Short-response problems

## 1

No, I don't think a fair, random die was used in either case, although it's more likely that the die was fair with 20 than with 100. Rolls of 1,2,3,5 received approximately their share (1/6) of rolls, but 4 and 6 are large outliers. 

Intuitively, fair die were more likely to have been used in the case of 20 rolls because the law of large numbers hasn't "kicked in" as much, i.e., larger outliers are less suprising, since as $n \to \infty, P(x=i) \to \frac{1}{6}, \,\, \forall i \in \{1,2,3,4,5,6\}$, and 100 is "closer" to infinity than 20 is.

In [11]:
binom(n=20, p=1/6).cdf(.296)
# binom.cdf(x=.296, n=20, p=1/6)


0.02608405330458885

## Chi 2 test

Note that it's only appropriate to use with the larger sample size.

In [None]:
obs = [
    [0.148, 0.174, 0.148, 0.296, 0.139, 0.096], 
    [1/6]*6
       ]

In [None]:
chi2_contingency(obs)

(0.060041787911964885,
 0.9999540083553458,
 5,
 array([[0.15741196, 0.17041846, 0.15741196, 0.23144894, 0.15290971,
         0.13139897],
        [0.15725471, 0.17024821, 0.15725471, 0.23121772, 0.15275695,
         0.1312677 ]]))

## 2 (Menu-Logging)

### Analytical

### Numerical

In [24]:
def choose_a_dish(): # simulate uniform RV over [1,30]
    return np.random.randint(low=0, high=30, size=1)[0]

In [28]:
def go_to_restaurant_until_all_tried():
    all_tried = False
    times_visited = 0
    dishes_tried = {i:False for i in range(0,30)}

    while not all_tried:
        dish = choose_a_dish()
        dishes_tried[dish] = True
        times_visited += 1
        if sum(dishes_tried.values()) == 30: # if all dishes tried
            all_tried = True
            
    return times_visited

In [41]:
def simulate_n_visits(n=10000, return_runs=False):
    runs = np.array([go_to_restaurant_until_all_tried() for i in tqdm(range(n))])
    
    expectation = runs.mean()
    
    print(f'Expected # Visits = {expectation}')
    
    if return_runs:
        return runs
    
runs = simulate_n_visits(30000, return_runs=True)

100%|██████████| 30000/30000 [00:58<00:00, 512.46it/s]

Expected # Visits = 120.2755





In [43]:
# plot (for fun)
px.histogram(x=runs, histnorm='probability density')