In [1]:
import numpy as np
import pandas as pd
# import viz # curriculum example visualizations


## How to run a simulation with Python/Numpy/Pandas
1. Figure out a way to represent data
2. Create a matrix of random data, rows = simulations and columns = 2 (because we roll 2 dice each time)
3. Apply an aggregate function, row wise to get the results of the simulation
4. Apply a final aggregate to get probablity

In [2]:
# Probability of flipping "Heads" on a coin?

# Flip a coin 100,000 times and calcualte the prob of flipping heads

# Find a way to represent data...Step 1

#Step #2
outcomes = ["Heads", "Tails"]
number_of_simulations = 100_000

#Step 3
flips = np.random.choice(outcomes, size=number_of_simulations)
flips

array(['Heads', 'Tails', 'Heads', ..., 'Tails', 'Tails', 'Tails'],
      dtype='<U5')

In [8]:
(flips == "Heads")

array([False,  True,  True, ...,  True,  True, False])

In [9]:
#Step #4
#After flipping 100_000 coins, our experimental prob of flipping heads is:

(flips == "Heads").mean()

0.5019

In [11]:
#What is the prob of rolling a dice on a six sided dice
#Step 1:
outcomes = [1,2,3,4,5,6]

#Step 2:
n_simulations = 10_000

rolls = np.random.choice(outcomes, size=n_simulations)
rolls

array([3, 4, 6, ..., 1, 2, 4])

In [12]:
#What are the chances of rolling a 5?
(rolls == 5).mean()

0.1689

In [13]:
#What is the prob of rolling a 5 or a 6?
(rolls >= 5).mean()

0.3331

In [14]:
# What the chances we roll something other than 3?
(rolls != 3).mean()

0.8317

## Roll 2 dice at once
- Step 1: How to represent data
- Step 2: Matrix of random data, rows = simulations, columns = trails
- Step 3. Apply an aggregate row-wise to get the result of each simulation
- Step 4. Apply a final aggregate to get the prob

In [16]:
#What are the chances of rolling snake eyes on 2 dice?

#Step 1
outcomes = [1,2,3,4,5,6]

#Step 2
#simulations = the number of times we run the experiment
# Trials = the number of trials in each experiment

n_simulations = 1_000_000
n_trials = 2 #bc we roll 2 dice

#size arguemnt can set our simulation and trial size
rolls = np.random.choice(outcomes, size=(n_simulations, n_trials) )
rolls



array([[4, 4],
       [3, 5],
       [6, 2],
       ...,
       [3, 3],
       [3, 5],
       [5, 6]])

In [18]:
#Step 3: apply an aggregate row-wise
#axis = 1 means sum across the rows....note that axis=0 will sum down the columns. 
sum_of_rolls = rolls.sum(axis=1)
sum_of_rolls

array([ 8,  8,  8, ...,  6,  8, 11])

In [19]:
#Step 4: apply a final aggregate:
#add up all the times that a simulation(experiment) produces the sum of 2
(sum_of_rolls == 2).mean()

0.027946

In [26]:
theoretical = 1/6 * 1/6
print(f"Our theoretical prob of rollinn snake eyes is: {theoretical} ")

Our theoretical prob of rollinn snake eyes is: 0.027777777777777776 


In [27]:
# What is the prob of rolling a 7 on two dice?
# 1+ 6, 2+5, 3+4, 4+3, 5+2, 6+1

outcomes = [1,2,3,4,5,6]

#Step 2:
# size=(simulations, trials)
rolls = np.random.choice(outcomes, size=(10_000, 2))

#Step 3:
sum_of_rolls = rolls.sum(axis=1)

(sum_of_rolls == 7).mean()

0.1683

In [28]:
# possible sum outcomes from rolling two dice

possible_sums = [2,3,4,5,6,7,8,9,10,11,12]

y = [(sum_of_rolls == n).mean() for n in sum_of_rolls]

In [30]:
sum_of_rolls[:10]

array([ 3,  7,  7,  6, 10,  5,  3, 12, 12,  9])

In [31]:
y[:10]

[0.0551,
 0.1683,
 0.1683,
 0.1387,
 0.0818,
 0.1118,
 0.0551,
 0.0301,
 0.0301,
 0.1128]

In [38]:
# What are the experimental probabilities of rolling each sum
df = pd.DataFrame()
# possible sum outcomes from 2 dice
df["outcome"] = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
# produce the probability of seeing each sum
y = [(sum_of_rolls == n).mean() for n in sum_of_rolls]
# set the probability to its own column
df["probability"] = pd.Series(y)
print("Sum outcome of rolling 2 dice and the probability of seeing that outcome")
df

Sum outcome of rolling 2 dice and the probability of seeing that outcome


Unnamed: 0,outcome,probability
0,2,0.0551
1,3,0.1683
2,4,0.1683
3,5,0.1387
4,6,0.0818
5,7,0.1118
6,8,0.0551
7,9,0.0301
8,10,0.0301
9,11,0.1128


## Setting our own probabilities

In [39]:
outcomes = ["Heads","Tails"]
flips = np.random.choice(outcomes, size=(10_000), p=[0.55, 0.45])

(flips == "Heads").mean()

0.5478

In [40]:
#what are the chances of flipping two heads in a row?
flips = np.random.choice(outcomes, size=(10_000),p=[0.55, 0.45])
flips

array(['Tails', 'Tails', 'Tails', ..., 'Tails', 'Heads', 'Heads'],
      dtype='<U5')

In [41]:
# It'll be easier to check for two heads if the head =1 and tails = 0
outcomes=[1,0]
flips = np.random.choice(outcomes, size=(100_000, 2),p=[0.55, 0.45])
flips

array([[1, 1],
       [0, 1],
       [1, 1],
       ...,
       [1, 0],
       [0, 1],
       [0, 1]])

In [42]:
#axis = 1 to sum across the row
num_of_heads = flips.sum(axis=1)
num_of_heads

array([2, 1, 2, ..., 1, 1, 1])

In [43]:
(num_of_heads == 2).mean()

0.30397

In [44]:
#What if it was a fair coin?
outcomes=[1,0]
flips = np.random.choice(outcomes, size=(100_000, 2))
num_of_heads = flips.sum(axis=1)
(num_of_heads == 2).mean()

0.25084

In [47]:
#Let's add some logic to probability
# np.random.normal? #how to get the docs example

numbers = np.random.randint(-50, 100, 100_000)
numbers 

array([ 17,  23, -50, ...,  53, -16,  93])

In [48]:
#What is the prob that any number is negative?
(numbers < 0).mean()


0.33304

In [49]:
#What is the prob a number is odd?
(numbers % 2 != 0).mean()

0.49896

In [57]:
#Prob of a number being odd and neg?

is_odd = (numbers % 2 != 0)
is_negative = (numbers > 0)
(is_odd & is_negative).mean()

0.333

In [52]:
# Prob of a number being even OR positve
is_even = (numbers % 2 == 0)
is_even

array([False, False,  True, ..., False,  True, False])

In [53]:
is_positive = (numbers > 0)
is_positive

array([ True,  True, False, ...,  True, False,  True])

In [54]:
(is_even | is_positive).mean()

0.83404

In [58]:
# Rolling two dice at a time, what is the prob of rolling an odd and then an even?

In [59]:
first_die = np.random.choice([1, 2, 3, 4, 5, 6], size=100_000)
second_die = np.random.choice([1, 2, 3, 4, 5, 6], size=100_000)
first_die, second_die

(array([5, 5, 4, ..., 5, 2, 3]), array([5, 3, 6, ..., 5, 6, 1]))

In [60]:
first_die_is_odd = (first_die % 2 != 0)

In [61]:
second_die_is_even = (second_die % 2 == 0)

In [62]:
first_odd_second_even = (first_die_is_odd & second_die_is_even)
first_odd_second_even

array([False, False, False, ..., False, False, False])

In [63]:
first_odd_second_even.mean()

0.25012