In [1]:
# Let's do our imports:
# matplotlib inline for notebook visualization display
%matplotlib inline
# numpy for matrix manipulation
import numpy as np
# pandas for dataframe manipulation
import pandas as pd
# curriculum example visualizations
import viz 
# and setting our random seed
np.random.seed(1349)

In [2]:
1/36

0.027777777777777776

In [3]:
n_trials = n_rows = 10 ** 6
n_dice = n_cols = 2

In [4]:
rolls = np.random.choice([1,2,3,4,5,6], n_trials * n_dice).reshape(n_rows, n_cols)

In [5]:
doubles_dice = (rolls.sum(axis=1) ==2).mean()

In [6]:
doubles_dice

0.027844

In [7]:
# --------------------------#

In [8]:
#simulation method: 

# represent our data's possible outcomes: 
outcomes = [1, 2, 3, 4, 5, 6]
#create the data: 
n_rows = 1_000_000
n_cols = 2

In [9]:
# Getting ther rolls: We will make a simulation of 1 million trials or siulated rolls for two dice

rolls = np.random.choice(outcomes, size = (n_rows, n_cols))

In [10]:
rolls[:5]

array([[3, 6],
       [6, 2],
       [4, 3],
       [1, 3],
       [3, 4]])

In [11]:
rolls.shape

(1000000, 2)

In [12]:
# using a sum isn't the best option here, since we are looking for two matching elements, or the number of unique elements
len(np.unique(rolls[4]))

2

In [20]:
#using list comprehension: 
# a list of length of the uniques for each instance for the full number of simulations by index:
# but only if the number of uniques is 1

In [21]:
dubs = [len(np.unique(rolls[n])) for n in range(0, n_rows-1) if len(np.unique(rolls[n])) ==1]

In [22]:
len(dubs)

166186

In [23]:
# The length of the number of times we rolled doubled, and we can divide
# that by the total number of simulations: 
calculated_prob = len(dubs) / len(rolls)

In [24]:
calculated_prob

0.166186

In [26]:
# The length of this is going to be the number of times we rolled doubles,
# and we can divide that by the total number of simulations: 
calculated_prob = len(dubs) / len(rolls)

In [27]:
calculated_prob

0.166186

In [30]:
print(f'The probability that we will flip at least 3 heads over {n_cols} coins is {calculated_prob}')

The probability that we will flip at least 3 heads over 2 coins is 0.166186


In [31]:
# number of ways that we could get three heads out of eight flips, 
# divided by number of possible flip outcomes of eight flips (2 * 2 * 2 * 2 * 2 * 2 * 2 * 2)
56/256

0.21875

In [35]:
# Let's make a million simulated flips of 8 trials, or independent coins.
n_trials = n_rows = 10 ** 6
n_dice = n_cols = 8
heads = 1
tails = 0
flips = np.random.choice([heads, tails], size=(n_rows, n_cols))

In [36]:
flips

array([[0, 0, 0, ..., 0, 1, 0],
       [1, 1, 1, ..., 0, 0, 1],
       [0, 0, 0, ..., 1, 0, 1],
       ...,
       [0, 0, 1, ..., 1, 0, 1],
       [0, 0, 1, ..., 1, 0, 1],
       [0, 1, 0, ..., 0, 1, 0]])

In [37]:
# since we assigned heads as a value of 1, the sum of any given row of 8 trials will be 3 of there were three heads
numheads = flips.sum(axis = 1)

In [38]:
calculated_prob = (numheads == 3).mean()

#And if we take the average number of times where that sum equaled 3: 
print(f'The probability that we will flip exactly 3 heads over {n_cols} coins is {calculated_prob}')

The probability that we will flip exactly 3 heads over 8 coins is 0.218223


In [39]:
#the second part: If the sum is equal or over to 3, we know that we rolled at least 3 heads : 

calculated_prob = (numheads >= 3).mean()
print(f'The propbaility that we will flip at least 3 heads over {n_cols} coins is {calculated_prob}')

The propbaility that we will flip at least 3 heads over 8 coins is 0.855633


In [40]:
# theoretical probability: 

(1/4) * (1/4)

0.0625

In [41]:
n_rows = 1_000_000
n_cols = 2
prob_ds = 0.25

In [42]:
data = np.random.random((n_rows, n_cols))

In [43]:
data

array([[0.7270842 , 0.26526702],
       [0.11222086, 0.4046571 ],
       [0.42903789, 0.97551988],
       ...,
       [0.14722509, 0.41099774],
       [0.06239987, 0.2035151 ],
       [0.81194869, 0.56567916]])

In [44]:
((data < prob_ds).sum(axis=1) == 2).mean()

0.06296

In [45]:
# avg number of poptarts consumed:
pop_avg = 3

# deviation of potars: 1.5 

pop_std = 1.5
n_cols = 5
n_ros = 1_000_000
simulated_comsumed_potars = np.random.normal(pop_avg, pop_std, size=(n_rows, n_cols))


In [47]:
simulated_comsumed_potars

array([[ 3.50649827,  1.89315779,  0.79421366,  4.16595839,  4.87260862],
       [ 2.39909705,  3.12299839,  2.3781295 ,  2.29342393, -0.18630414],
       [ 3.64695621,  4.61487728,  3.73649818,  2.60201289,  0.48986778],
       ...,
       [ 4.4941995 ,  5.65110206,  3.70652458,  0.59841483,  2.52558462],
       [ 3.27056616,  4.48665974,  2.64998609,  2.1394958 ,  3.28661786],
       [ 4.40960137,  3.01889297,  1.48034021,  1.00853759,  6.16588137]])

In [49]:
simulated_comsumed_potars.sum(axis=1)

array([15.23243673, 10.00734473, 15.09021235, ..., 16.97582559,
       15.83332565, 16.08325351])

In [52]:
calculated_prob = (simulated_comsumed_potars.sum(axis=1) <= 16).mean()

In [53]:
print(f'The probability that there will still be poptarts in the vending machine after {n_cols} days is {calculated_prob}')

The probability that there will still be poptarts in the vending machine after 5 days is 0.617287


In [54]:
men_avg = 178
men_std = 8
wmn_avg = 170
wmn_std = 6

In [57]:
# since we have an average and a standard deviation, we can use np.random.normal

In [58]:
s_men = np.random.normal(men_avg, men_std, 1_000_000)

In [59]:
s_men[:5]

array([183.5996343 , 180.09657582, 180.59250856, 166.15554781,
       168.46541026])

In [60]:
s_wmn = np.random.normal(wmn_avg, wmn_std, 1_000_000)

In [62]:
calculated_prob = (s_wmn > s_men).mean()

In [63]:
calculated_prob

0.210853

In [64]:
nrows = 1_000_000

# n_cols in this case is goign to be the number of students installing Anaconda

n_cols = 50
conda_failure = 1
great_success = 0
prob_failure = (1/250)

In [65]:
prob_failure

0.004

In [66]:
installs = np.random.random((nrows, n_cols))

In [68]:
((installs < prob_failure).sum(axis=1) == 0).mean

<function ndarray.mean>

In [69]:
n_rows = 1_000_000

In [70]:
# n_cols in this case is goign to be the number of students installing Anaconda

n_cols = 100
conda_failure = 1
great_success = 0
prob_failure = (1/250)

In [71]:
prob_failure

0.004

In [72]:
installs = np.random.random((nrows, n_cols))

In [73]:
((installs < prob_failure).sum(axis=1) == 0).mean()

0.669433

In [74]:
nrows = 1_000_000

# n_cols in this case is going to be the number of students installing Anaconda

n_col = 450
conda_failue = 1
great_success = 0
prob_failure = (1/250)

In [75]:
prob_failure

0.004

In [76]:
installs = np.random.random((nrows, n_cols))

In [77]:
((installs < prob_failure).sum(axis=1)==0).mean()

0.670128

In [79]:
n_rows = 1_000_000

# n_cols in this case is going to be the number of students installing Anaconda

n_cols = 150
conda_failure = 1
great_success = 0
prob_failure = (1/250)

In [80]:
prob_failure

0.004

In [81]:
installs = np.random.random((nrows, n_cols))

In [82]:
((installs < prob_failure).sum(axis=1) >0).mean()

0.451607

In [83]:
# The one in 250 is going to come up with our probability bias for the two outcomes.  
# 1/250 = 0.004 probability that we will have an anaconda failure.

In [84]:
calculated_prob

0.210853

In [None]:
# We are still looking at these like independent events, so:
# There either will be or will not be a food truck, with a probability of 0.7 in favor of there being a food truck.
# 3 days of the week have passed, with two more left, assuming a regular business week.

In [85]:
n_rows = 1_000_000
n_cols = 3 
food_truck = 1
no_truck = 0
truck_prob = 0.7

In [87]:
data = np.random.random((n_rows, n_cols))

In [88]:
((data < truck_prob).sum(axis=1) ==0).mean()

0.026736

In [89]:
calculated_prob = ((data < truck_prob).sum(axis=1) ==0).mean

In [90]:
calculated_prob

<function ndarray.mean>

In [None]:
# The presence of a food truck is not dependent on whether or not one showed up on the previous day, 
# its independent.  Let's see what its like for the last two days

In [91]:
n_rows = 1_000_000
n_cols = 2
food_truck = 1
no_truck = 0
truck_prob = 0.7

In [92]:
lunch_days = np.random.random((n_rows, n_cols))

In [93]:
calculated_prob = ((lunch_days < truck_prob).sum(axis=1)>0).mean

In [94]:
calculated_prob

<function ndarray.mean>

In [95]:

print(f'The probability that we have seen a food truck over the course of {n_cols} days is {calculated_prob}')

The probability that we have seen a food truck over the course of 2 days is <built-in method mean of numpy.ndarray object at 0x7fb9ed7adda0>


In [96]:
# 365 days in a year (typically)
# 23 people in the room
# we want an instance where both are the same number!

# Hey, this is exactly the same as our first problem with a few extra steps!

In [97]:
# Represent our data's possible outcomes, the number of days in a year
# People born on leap days don't actually exist, so we are going to exclude them here:

outcomes = range(0, 365)
# Create the data!
n_trials = 23
n_simulations = 1_000_000

In [98]:
# Let's get our simulations. We'll make a simulation of 1 million classrooms of 23 students.
#
# outcomes: possible unique days of the year that a person could have.
# n_simulations: the number of simulated classroom trials
# n_trials: the number of student birthdays
#

In [99]:
classrooms = np.random.choice(outcomes, size=(n_simulations, n_trials))

In [100]:
# Let's use a list comprehension: 
# a list of the length of the uniques for each instance for the full number of simulations by index, 
# but only if the number of uniques is less than the number of students in the class

In [101]:
list_of_twin_bdays = [len(np.unique(classrooms[n])) for n in range(0, n_simulations-1) if len(np.unique(classrooms[n])) < 23]


In [102]:
prop_twinsies = len(list_of_twin_bdays) / n_simulations
print(f'The probability that we will have one or more shared birthdays over {n_trials} students is {prop_twinsies}')

The probability that we will have one or more shared birthdays over 23 students is 0.507167


In [104]:
# For 20 people?
# Represent our data's possible outcomes, the number of days in a year
# People born on leap days don't actually exist, so we are going to exclude them here:

outcomes = range(0, 365)
# Create the data!
n_trials = 20
n_simulations = 1_000_000

In [None]:

# Let's get our simulations. We'll make a simulation of 1 million classrooms of 23 students.
#
# outcomes: possible unique days of the year that a person could have.
# n_simulations: the number of simulated classroom trials
# n_trials: the number of student birthdays

In [105]:

classrooms = np.random.choice(outcomes, size=(n_simulations, n_trials))

In [106]:
#using list comprehension

In [107]:

list_of_twin_bdays = [len(np.unique(classrooms[n])) for n in range(0, n_simulations-1) if len(np.unique(classrooms[n])) < 20]

In [108]:
# The length of this is going to be the number of times we had a class with shared birthdays, and we can divide that by the total number of simulations:

In [110]:

prop_twinsies = len(list_of_twin_bdays) / n_simulations
print(f'The probability that we will have one or more shared birthdays over {n_trials} students is {prop_twinsies}')

The probability that we will have one or more shared birthdays over 20 students is 0.411048


In [None]:
#40?

In [111]:
# Represent our data's possible outcomes, the number of days in a year
# People born on leap days don't actually exist, so we are going to exclude them here:

outcomes = range(0, 365)
# Create the data!
n_trials = 40
n_simulations = 1_000_000

In [112]:
# Let's get our simulations. We'll make a simulation of 1 million classrooms of 23 students.
#
# outcomes: possible unique days of the year that a person could have.
# n_simulations: the number of simulated classroom trials
# n_trials: the number of student birthdays

In [113]:

classrooms = np.random.choice(outcomes, size=(n_simulations, n_trials))

In [114]:
list_of_twin_bdays = [len(np.unique(classrooms[n])) for n in range(0, n_simulations-1) if len(np.unique(classrooms[n])) < 40]

In [115]:
prop_twinsies = len(list_of_twin_bdays) / n_simulations
print(f'The probability that we will have one or more shared birthdays over {n_trials} students is {prop_twinsies}')


The probability that we will have one or more shared birthdays over 40 students is 0.891226
