In [1]:
import pandas as pd
from scipy.stats import binom
# API to scipy.stats
# https://docs.scipy.org/doc/scipy/reference/stats.html

In [6]:
df = pd.DataFrame({'a': [1,2,3,4,5,6,7,8,9,10]})
df

Unnamed: 0,a
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8
8,9
9,10


## Discrete probability distribution

### Binom

In [2]:
# n = 20 times of the experiment, 
# p = 0.3 the probability of occurrence in each experiment, 
# k = 3 times of occurrence
# probability mass function
binom.pmf(3, 20, 0.3)

0.07160367220526222

In [3]:
# percent point function: specifies the value of the random variable 
    # such that the probability of the variable being less than or equal to
    # that value equals the given probability.
binom.ppf(0.4, 20, 0.3) # the probability that the number of occurrences <= 5 is 0.4

5.0

In [4]:
binom.cdf(5, 20, 0.3) # cumulative probability

0.41637082944748144

In [7]:
df['binom_pmf'] = df['a'].apply(lambda x: binom.pmf(x, 20, 0.3))
df

Unnamed: 0,a,binom_pmf
0,1,0.006839
1,2,0.027846
2,3,0.071604
3,4,0.130421
4,5,0.178863
5,6,0.191639
6,7,0.164262
7,8,0.114397
8,9,0.06537
9,10,0.030817


### Poisson

In [8]:
from scipy.stats import poisson
poisson.pmf(4, 5)

0.17546736976785063

In [11]:
# generate 5 random values from poisson distribution with Mu = 10
poisson.rvs(10, size=5)

array([13, 11,  8,  9, 10])

In [13]:
poisson.cdf(10,4)

0.9971602338794863

## Continuous probability distribution

### Normal distribution

In [14]:
from scipy.stats import norm
norm.pdf(20, loc=0, scale = 1)

5.520948362159764e-88

In [18]:
# probability of random values <= 2 (area)
norm.cdf(2, loc=0, scale = 1)

0.9772498680518208

In [15]:
# generate 20 random values from normal distribution with mean=0 and std = 1
norm.rvs(size = 20, loc = 0, scale = 1)

array([ 0.28572838, -0.90820854,  0.15611328,  0.49621967,  0.21797481,
       -0.01130368,  0.13242866,  1.73990268,  0.17765875, -0.06662965,
       -1.27042603,  0.1863341 ,  0.04304107,  0.78938017,  0.48683264,
        1.57322182,  0.8457876 ,  1.49592496, -0.67847275, -0.33379425])

### Exponential distribution

In [20]:
from scipy.stats import expon
expon.pdf(20, loc=0, scale = 1)

2.061153622438558e-09

In [21]:
# probability of random values <= 2 (area)
expon.cdf(2, loc=0, scale = 1)

0.8646647167633873

In [22]:
# generate 20 random values from normal distribution with mean=0 and std = 1
expon.rvs(size = 20, loc = 0, scale = 1)

array([0.13595851, 2.91199096, 0.36797418, 1.03257427, 0.16664366,
       2.34761691, 0.22580475, 0.32473147, 2.04045015, 0.11018252,
       0.09978225, 0.97318483, 1.25106744, 0.22053716, 0.51916134,
       2.68604693, 0.53526842, 0.04590027, 0.27356904, 0.71763698])