# Simulations

In [1]:
import pandas as pd
import numpy as np

## Carnival Dice Rolls
- You are at a carnival and come across a person in a booth offering you a game of "chance" (as people in booths at carnivals tend to do).

- You pay 5 dollars and roll 3 dice. If the sum of the dice rolls is greater than 12, you get 15 dollars. If it's less than or equal to 12, you get nothing.

- Assuming the dice are fair, should you play this game? How would this change if the winning condition was a sum greater than or equal to 12?

In [2]:
ntrials= 10_000
ndice = 3
rolls = np.random.choice([1,2,3,4,5,6], ntrials * ndice).reshape(ntrials,ndice)

In [3]:
rolls

array([[2, 3, 2],
       [4, 1, 5],
       [1, 3, 3],
       ...,
       [5, 5, 2],
       [1, 5, 5],
       [6, 4, 1]])

In [5]:
# we want the sum of every row (as opposed to the sum of all the numbers, 
# or the sum by column) with the axis key word argument.

sums_by_trial = rolls.sum(axis=1)
sums_by_trial

array([ 7, 10,  7, ..., 12, 11, 11])

In [6]:
#We can now convert each value in our array to a boolean value 
#indicating whether or not we won:
wins = sums_by_trial > 12
wins

array([False, False, False, ..., False, False, False])

In [7]:
#To calculate an overall win rate, we can treat each win as a 1 and each loss as 0, 
#then take the average of the array:
win_rate = wins.astype(int).mean()
win_rate

0.2639

In [8]:
expected_winnings = win_rate * 15
cost = 5
expected_profit = expected_winnings - cost
expected_profit

-1.0414999999999996

## No Rest or Relaxation
- There's a 30% chance my son takes a nap on any given weekend day. What is the chance that he takes a nap at least one day this weekend? What is the probability that he doesn't nap at all?

In [9]:
p_nap = .3
ndays = ncols = 2 #number of days in a weekend
n_simulated_weekends = nrows = 10**5 #(10,000) the number of simulations we want to run


In [10]:
#To determine whether or not a nap is taken on a given day, 
#we'll generate a random number between 0 and 1, and say that it is a nap 
#if it is less than our probability of taking a nap.
#np.random.random: generates numbers between 0 and 1
data = np.random.random((nrows, ncols))
data

array([[0.43814348, 0.81104868],
       [0.10142761, 0.2653701 ],
       [0.96883218, 0.2882958 ],
       ...,
       [0.1729312 , 0.16523551],
       [0.85264833, 0.60988442],
       [0.01336071, 0.28796331]])

In [11]:
naps = data < p_nap
naps

array([[False, False],
       [ True,  True],
       [False,  True],
       ...,
       [ True,  True],
       [False, False],
       [ True,  True]])

In [12]:
# Now that we have each day as either true or false, we can take the sum 
#of each row to find the total number of naps for the weekend. 
#When we sum an array of boolean values, numpy will treat True as 1 and 
#False as 0.
naps.sum(axis=1)

array([0, 2, 1, ..., 2, 0, 2])

In [13]:
#We can use this to answer our original questions, 
#what is the probability that at least one nap is taken?
(naps.sum(axis=1) >= 1).mean()

0.50851

In [14]:
#What is the probability no naps are taken?
(naps.sum(axis = 1) == 0).mean()

0.49149

In [15]:
#What is the probability that two naps are taken?
(naps.sum(axis=1)== 2).mean()

0.0899

## One With Dataframes
- What is the probability of getting at least one 3 in 3 dice rolls?

- The numpy.random module provides a number of functions for generating random numbers.

- np.random.choice: selects random options from a list
- np.random.uniform: generates numbers between a given lower and upper bound
- np.random.random: generates numbers between 0 and 1
- np.random.randn: generates numbers from the standard normal distribution
- np.random.normal: generates numbers from a normal distribution with a specified mean and standard deviation

In [19]:
#To simulate this, we'll use a similar strategy to how we modeled the dice 
#rolls in the previous example, but this time, we'll store the results 
#in a pandas dataframe so that we can apply a lambda function 
#that will check to see if one of the rolls was a 3.

n_simulations = nrows = 10**5
n_dice_rolled = ncols = 3
rolls = np.random.choice([1,2,3,4,5,6], nrows * ncols).reshape(nrows,ncols)

In [18]:
(pd.DataFrame(rolls).apply(lambda row: 3 in row.values, axis=1).mean())

0.42212

In [34]:
#How likely is it that you roll doubles when rolling two dice?
outcomes = [1,2,3,4,5,6]
simulations = 10_000
trials = 2
rolls = np.random.choice(outcomes, size = (simulations, trials))
rolls


array([[4, 3],
       [2, 5],
       [4, 6],
       ...,
       [3, 6],
       [5, 1],
       [1, 2]])

In [35]:
#two matching elements
np.unique(rolls[2])

array([4, 6])

In [37]:
#If you flip 8 coins, what is the probability of getting exactly 3 heads? 
#What is the probability of getting more than 3 heads?
outcomes = [1, 0] #1 is heads, 0 is tails
nrows = 1_000_000
ncols = 8
flips = np.random.choice([1,0], size = (nrows, ncols))
flips

array([[1, 0, 1, ..., 1, 0, 0],
       [1, 1, 1, ..., 0, 1, 0],
       [0, 1, 0, ..., 0, 0, 1],
       ...,
       [0, 0, 0, ..., 0, 1, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 1, 0, 0]])

In [38]:
number_of_heads = flips.sum(axis=1)


In [39]:
calculated_probability = (number_of_heads == 3).mean()
calculated_probability

0.21886

In [46]:
#There are approximitely 3 web development cohorts for every 1 data science 
#cohort at Codeup. Assuming that Codeup randomly selects an alumni to put
#on a billboard, what are the odds that the two billboards I drive past
#both have data science students on them?
#true as 1 and false as 0
# outcomes = [0,0,0,1] #[wdev, wdev, wdev, ds]
ds = 1
wdev = 0
nrows = 1_000_000
ncols = 2
billboard = np.random.choice([ds,wdev],size =(nrows, ncols), p = [.25,.75])
billboard
ds_billboard = billboard.sum(axis =1)
calculated_probability = (ds_billboard == 2).mean()
calculated_probability

0.06267

In [47]:
#Codeup students buy, on average, 3 poptart packages (+- 1.5) 
#a day from the snack vending machine. If on monday the machine is 
#restocked with 17 poptart packages, how likely is it that I will be able 
#to buy some poptarts on Friday afternoon?
#np.random.normal
mean_poptarts = 3
sd_poptarts = 1.5
days = 5 #monday - friday = ntrials
simulations = 1_000_000

consumed = np.random.normal(mean_poptarts, sd_poptarts, size =(simulations, days))
consumed



array([[-0.01123513, -0.19027659,  4.73333844,  0.82449569,  1.67431416],
       [ 1.6622903 ,  3.08844349,  3.30053326,  1.81816882,  3.83748504],
       [-0.74129448,  2.23555684,  2.93264115,  2.80742895,  4.2762177 ],
       ...,
       [ 0.20820672,  5.59577687,  5.85637469,  5.54524099,  0.22963049],
       [ 2.77559788,  5.54818364, -0.79475047,  0.98250785,  2.02052411],
       [ 1.12779097,  2.35031642,  3.65668341,  3.92624047,  2.44545478]])

In [49]:
#probability that there will be a poptart on friday
calculated = (consumed.sum(axis=1) >= 17).mean()
calculated

0.275564

In [None]:
#Compare Heights
# Men have an average height of 178 cm and standard deviation of 8cm.
# Women have a mean of 170, sd = 6cm.
# If a man and woman are chosen at random, P(woman taller than man)?

In [53]:
avg_men = 178
sd_men = 8
avg_women = 170
sd_women = 6
simulations = 1_000_000

men = np.random.normal(loc = avg_men, scale = sd_men, size = simulations)
men
women = np.random.normal(loc = avg_women, scale = sd_women, size = simulations)
women

array([176.6391034 , 158.67430711, 176.99484436, ..., 172.50528118,
       169.13856147, 175.91604741])

In [55]:
women_taller = (women > men).mean()
women_taller

0.211925

In [None]:
#6. When installing anaconda on a student's computer, 
# there's a 1 in 250 (.004) chance that the download is corrupted and the installation fails. 
# What are the odds that after having 50 students download anaconda, 
# no one has an installation issue? 100 students?
# What is the probability that we observe an installation issue within the first 150 students 
# that download anaconda?
# How likely is it that 450 students all download anaconda without an issue?
#50

In [66]:
simulations = 1_000_000
trials = 50
failure = 1
success = 0
#What are the odds that after having 50 students download anaconda, no one has an installatino issue?
installs = np.random.choice([failure, success], size = (simulations, trials), p =[1/250,249/250])
installs

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [67]:
calculate = (installs.sum(axis=1) > 0).mean()
calculate

0.181794

In [60]:
#What are the odds that after having 100 students download anaconda, 
#no one has an installatino issue?
simulations = 1_000_000
trials = 100
failure = 1
success = 0
installs = np.random.choice([failure, success], size =(simulations, trials), p =[1/250, 249/250])
installs

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [62]:
calculate = (installs.sum(axis=1) == 0).mean()
calculate

0.669208