# Foundations of Probability in Python

## Let's start flipping coins

Binary outcome: Bernulli trial

### Flipping coins

In [3]:
import numpy as np

In [4]:

from scipy.stats import bernoulli


np.random.seed(42)


coin_flip = bernoulli.rvs(p=.35, size=1)
print(coin_flip)

[0]


In [5]:

ten_coin_flips = bernoulli.rvs(p=.35, size=10)
coin_flips_sum = sum(ten_coin_flips)
print(coin_flips_sum)

4


### Using binom to flip even more coins

In [7]:
from scipy.stats import binom

In [8]:

draws = binom.rvs(n=10, p=.35, size=20)
print(draws)

[6 5 2 2 2 3 4 3 3 4 2 3 3 3 5 2 4 4 1 4]


### Predicting the probability of defects

* `binom.pmf()` - calculates the probability of having exactly k heads out of n coin flips.
* `binom.cdf()` - calculates the probability of having k heads or less out of n coin flips.
* `binom.sf()` - calculates the probability of having more than k heads out of n coin flips.

In [9]:

prob_one_defect = binom.pmf(k=1, n=50, p=0.02)
print(prob_one_defect)

0.37160171437460954


In [10]:

prob_no_defects = binom.pmf(k=0, n=50, p=0.02)
print(prob_no_defects)

0.36416968008711675


In [11]:

prob_two_or_less_defects = binom.cdf(k=2, n=50, p=0.02)
print(prob_two_or_less_defects)

0.921572251649031


### Predicting employment status

In [13]:

prob_five_yes = binom.pmf(k=5, n=8, p=0.65)
print(prob_five_yes)

0.2785857790625


In [14]:

prob_three_or_less_no = 1-binom.cdf(k=3, n=8, p=0.65)
print(prob_three_or_less_no)

0.8939090951171875


In [15]:

prob_more_than_three_yes = binom.sf(k=3, n=8, p=0.65)
print(prob_more_than_three_yes)

0.8939090951171875


### Predicting burglary conviction rate


In [16]:

four_solved = binom.pmf(k=4, n=9, p=0.20)
print(four_solved)

0.04587520000000002


In [17]:

more_than_three_solved = binom.sf(k=3, n=9, p=0.2)
print(more_than_three_solved)

0.08564172800000006


In [18]:


two_or_three_solved = binom.pmf(k=2, n=9, p=0.2) + binom.pmf(k=3, n=9, p=0.2)
print(two_or_three_solved)

0.4781506560000002


In [19]:

tail_probabilities = binom.cdf(k=1, n=9, p=0.2) + binom.sf(k=7, n=9, p=0.2)
print(tail_probabilities)

0.4362265599999997


### Calculating the sample mean

In [20]:
from scipy.stats import describe

In [21]:

sample_of_100_flips = binom.rvs(n=1, p=0.5, size=100)
sample_mean_100_flips = describe(sample_of_100_flips).mean
print(sample_mean_100_flips)

0.49


In [22]:

sample_mean_1000_flips = describe(binom.rvs(n=1, p=0.5, size=1000)).mean
print(sample_mean_1000_flips)

0.505


In [23]:

sample_mean_2000_flips = describe(binom.rvs(n=1, p=0.5, size=2000)).mean
print(sample_mean_2000_flips)

0.5045


### Checking the result

In [24]:
sample = binom.rvs(n=10, p=0.3, size=2000)


sample_describe = describe(sample)


mean = 10*0.3


variance = mean*(1-0.3)


binom_stats = binom.stats(n=10, p=0.3)

print(sample_describe.mean, sample_describe.variance, mean, variance, binom_stats)

2.9655 2.002310905452726 3.0 2.0999999999999996 (array(3.), array(2.1))


### Calculating the mean and variance of a sample

In [27]:
averages = []
variances = []

In [29]:
for i in range(0, 1500):
    
    sample = binom.rvs(n=10, p=0.25, size=10)
    
    averages.append(describe(sample).mean)
    variances.append(describe(sample).variance)
  

print("Mean {}".format(describe(averages).mean))


print("Variance {}".format(describe(variances).mean))


print(binom.stats(n=10, p=0.25))

Mean 2.5001333333333333
Variance 1.8629703703703704
(array(2.5), array(1.875))
