In [1]:
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from env import get_url_for_pancakes

# A bank found that the average number of cars waiting during the noon hour at a drive-up window follows a Poisson distribution with a mean of 2 cars. Make a chart of this distribution and answer these questions concerning the probability of cars waiting at the drive-up window.

In [81]:
cars = stats.poisson(2)
sim_cars = cars.rvs(10_000)

## What is the probability that no cars drive up in the noon hour?

In [82]:
theoretical = cars.pmf(0)
simulated = (sim_cars == 0).mean()

theoretical, simulated

(0.1353352832366127, 0.1397)

## What is the probability that 3 or more cars come through the drive through?

In [83]:
theoretical = cars.sf(2)
simulated = (sim_cars >= 3).mean()

theoretical, simulated

(0.32332358381693654, 0.321)

## How likely is it that the drive through gets at least 1 car?

In [84]:
theoretical = cars.sf(0)
simulated = (sim_cars > 0).mean()

theoretical, simulated

(0.8646647167633873, 0.8603)

# Grades of State University graduates are normally distributed with a mean of 3.0 and a standard deviation of .3.

In [85]:
grades = stats.norm(3.0, .3)
sim_grades = grades.rvs(10_000)

## What grade point average is required to be in the top 5% of the graduating class?

In [87]:
theoretical = grades.isf(.05)
simulated = np.percentile(sim_grades, 95)

theoretical, simulated

(3.4934560880854417, 3.494850177512144)

## What GPA constitutes the bottom 15% of the class?

In [89]:
theoretical = grades.ppf(.15)
simulated = np.percentile(sim_grades, 15)

theoretical, simulated

(2.689069983151863, 2.689435404412475)

## An eccentric alumnus left scholarship money for students in the third decile from the bottom of their class. Determine the range of the third decile.  Would a student with a 2.8 grade point average qualify for this scholarship?

In [93]:
gpa = 2.8

# Solving for theoretical
lower_bound = grades.ppf(.2)
upper_bound = grades.ppf(.3)
theoretical = lower_bound < gpa < upper_bound


# Solving for simulated
thirty = np.percentile(sim_grades, 30)
forty = np.percentile(sim_grades, 40)
simulated = thirty >= gpa < forty

theoretical, simulated

(True, True)

### dd

In [13]:
start_of_3 > 2.8 < end_of_3

True

### What percentile is a gpa of 3.5?

In [99]:
theoretical = grades.cdf(3.5)
simulated = (sim_grades <= 3.5).mean()

theoretical, simulated

(0.9522096477271853, 0.9524)

# A marketing website has an average click-through rate of 2%. One day they observe 4326 visitors and 97 click-throughs. 

In [128]:
visitors = stats.binom(4326, .02)
sim_visitors = visitors.rvs(10_000)

In [129]:
theoretical = visitors.sf(96)
simulated = (sim_visitors > 96).mean()

theoretical, simulated

(0.1397582363130086, 0.1429)

# You are working on some statistics homework consisting of 100 questions where all of the answers are a probability rounded to the hundreths place. Looking to save time, you put down random probabilities as the answer to each question. What is the probability that at least one of your first 60 answers is correct?

In [21]:
p = 1/100

In [22]:
answers = stats.binom(60, p)

In [112]:
theoretical = answers.sf(0)
simulated = (answers.rvs(10_000) >= 1).mean()

theoretical, simulated

(0.4528433576092388, 0.451)

# The codeup staff tends to get upset when the student break area is not cleaned up. Suppose that there's a 3% chance that any one student cleans the break area when they visit it, and, on any given day, about 90% of the 3 active cohorts of 22 students visit the break area. 

In [120]:
n_students = 22 * 3
t_students = int(n_students * .9)
cleaned = stats.binom(t_students, .03)

## How likely is it that the break area gets cleaned up each day? 

In [124]:
daily = cleaned.sf(0)
daily

0.8342199288437355

## How likely is it that it goes two days without getting cleaned up? 

In [125]:
not_cleaned = 1 - daily
not_cleaned ** 2

0.027483031992576113

## All week?

In [126]:
not_cleaned ** 5

0.0001252165138809122

In [127]:
not_cleaned ** 7

3.441329456987961e-06

## Simulated Test Answers

### Liklyhood to get cleaned each day

In [31]:
sim_clean_daily = (sim_clean >= 1).mean()

In [32]:
sim_not_clean_daily = 1 - sim_clean_daily

### Two days without getting cleaned

In [33]:
sim_not_clean_daily ** 2

0.027489639999999982

### All week

In [34]:
sim_not_clean_daily ** 5

0.0001252917949552475

## You want to get lunch at La Panaderia, but notice that the line is usually very long at lunchtime. After several weeks of careful observation, you notice that the average number of people in line when your lunch break starts is normally distributed with a mean of 15 and standard deviation of 3.

In [35]:
pan = stats.norm(15 * 2, 3 * 2)

# If it takes 2 minutes for each person to order, and 10 minutes from ordering to getting your food, what is the likelihood that you have at least 15 minutes left to eat your food before you have to go back to class? Assume you have one hour for lunch, and ignore travel time to and from La Panaderia.

In [36]:
pan.cdf(33)

0.6914624612740131

## Simulated Test Answers

In [138]:
(pan.rvs(10_000) <= 33).mean()

0.6912

# Connect to the employees database

In [62]:
url = get_url_for_pancakes('employees')
curr_sal = pd.read_sql('SELECT * FROM salaries WHERE to_date > NOW()', url)
curr_sal.head()

Unnamed: 0,emp_no,salary,from_date,to_date
0,10001,88958,2002-06-22,9999-01-01
1,10002,72527,2001-08-02,9999-01-01
2,10003,43311,2001-12-01,9999-01-01
3,10004,74057,2001-11-27,9999-01-01
4,10005,94692,2001-09-09,9999-01-01


## find the average salary of current employees, along with the standard deviation. 

In [139]:
avg_sal = curr_sal.salary.mean()
std_sal = curr_sal.salary.std()
print(f'The average salary of current employees is {avg_sal}, and the standard deviation is {std_sal}')

The average salary of current employees is 72012.23585730705, and the standard deviation is 17309.99538025198


## Model the distribution of employees salaries with a normal distribution and answer the following questions:

In [76]:
sal = stats.norm(avg_sal, std_sal)