# Types of Distribution

## Binomial Distribution

In [2]:
from scipy.stats import binom

# Defines the number of successes, the number of trials, and the probability of a success in each trial
k, n, p = 5,10, 0.7

`binom.pmf` can be used to calculate the probability of `k` successes for a given `n `and `p`.

In [5]:
P = binom.pmf(k, n, p)
print(P)

0.10291934520000004


`binom.cdf` gives the cumulative probability of `k` or fewer successes for a given `n` and `p`

In [13]:
cp = binom.cdf(k, n, p)
print(cp)

0.15026833260000005


The *mean* and *variance* of the distribution

In [24]:
mean  =  binom.stats(n, p, moments = 'm')

var = binom.stats(n, p, moments='v')
print("Mean:" + str(mean) + ", Variance:" + str(var))

Mean:7.0, Variance:2.1000000000000005


## Hypergeometric Distribution

In [49]:
from scipy.stats import hypergeom
import matplotlib.pyplot as plt

# Defines the number of successes in the sample, size of the population, number of successes in the population, and size of the sample
k, N, x, n = 12, 52, 26, 20

`hypergeom.pmf` can be used to calculate the probability of `k` successes for a given `N`, `x`, and `n`.

In [None]:
P = hypergeom.pmf(k, N, x, n, loc=0)
print(P)

`hypergeom.cdf` gives the cumulative probability of `k` or fewer successes for a given `N`, `x`, and `n`.

In [34]:
cp = hypergeom.cdf(k, N, x, n)
print(cp)

0.923319801344445


The *mean* and *variance* are calculated using `hypergeom.stats`.

In [51]:
mean = hypergeom.stats(N, x, n, loc=0, moments='m')

var = hypergeom.stats(N, x, n, loc=0, moments='v')
print('Mean:' + str(mean) + ', Variance:' +  str(var))

Mean:10.0, Variance:3.1372549019607843


## Poison Distrubution

In [2]:
from scipy.stats import poisson

# Defines the desired number of successes and the mean of the distribution
x, lam= 12, 9

# poisson.pmf can be used to calculate the probability of x successes for a given lambda.
# Calculates the probability of x successes given the defined lambda
P = poisson.pmf(x, lam)
print(P)

# poisson.cdf gives the cumulative probability of x or fewer successes for a given lambda.
# Calculates the cumulative probability of x or fewer successes given the defined lambda
cp = poisson.cdf(x, lam)
print(cp)

# The mean and variance are calculated using poisson.stats.
# Returns the mean of the distribution
mean = poisson.stats(lam, moments='m')
print(mean)

# Returns the variance of the distribution 
var = poisson.stats(lam, moments='v')
print(var)

# poisson.rvs can be used to generate a set of random numbers with the Poisson distribution defined by lambda.
# Generates 10 random numbers with a Poisson distribution with a mean of lam
r = poisson.rvs(lam, size=10)
print(r)

0.0727650466416229
0.8757734291709649
9.0
9.0
[11  7 13  7  9 11 10 13 16  7]


## T distribution

In [None]:
import scipy.stats as st

# For a t-distribution, if the degrees of freedom is 30, the mean is 0,
# and the standard deviation is 1, what is P(t < -0.25)?
print(st.t.cdf(-0.25, 30, 0, 1))

# For a t-distribution, if the degrees of freedom is 30, the mean is 0,
# and the standard deviation is 1, what is P(t < 1.5)?
print(st.t.cdf(1.5, 30, 0, 1))

# For a t-distribution, if the degrees of freedom is 30, the mean is 0,
# and the standard deviation is 1, what is P(t > -0.25)?
print(st.t.sf(-0.25, 30, 0, 1))

# For a t-distribution, if the degrees of freedom is 30, the mean is 0,
# and the standard deviation is 1, what is P(t > 1.5)?
print(st.t.sf(1.5, 30, 0, 1))

# To find the probability between two critical values, 
# the difference between the two probabilities is calculated.
# For a t-distribution, if the degrees of freedom is 30, the mean is 0,
# and the standard deviation is 1, what is P(-0.25 < t < 1.5)?
print(st.t.cdf(1.5, 30, 0, 1) - st.t.cdf(-0.25, 30, 0, 1))

# For a t-distribution, if the degrees of freedom is 30, the mean is 0,
# and the standard deviation is 1, what is P(1.5 < t < 2.85)?
print(st.t.cdf(2.85, 30, 0, 1) - st.t.cdf(1.5, 30, 0, 1))

# Both t.cdf() and t.sf() can also be used for t-distributions 
# with different degrees of freedom and when the mean is 
# not 0 or the standard deviation is not 1.
# For a t-distribution, if the degrees of freedom is 59, the mean is 55,
# and the standard deviation is 7.5, what is P(t < 62)?
print(st.t.cdf(62, 59, 55, 7.5))

# For a t-distribution, if the degrees of freedom is 34, the mean is 55,
# and the standard deviation is 7.5, what is P(t > 51)?
print(st.t.sf(51, 34, 55, 7.5))

# For a t-distribution, if the degrees of freedom is 59, the mean is 55,
# and the standard deviation is 7.5, what is P(49 < t < 60)?
print(st.t.cdf(60, 59, 55, 7.5) - st.t.cdf(49, 59, 55, 7.5))

## F-distribution

In [None]:
import scipy.stats as st

# For an F-distribution, if the degrees of freedom between samples is 2
# and the degrees of freedom within samples is 5, what is P(F < 2)?
print(st.f.cdf(2, 2, 5))

# f.sf(f, dfb, dfw) returns the probability of  being greater than a critical value f for an -distribution with  equal to dfb and  equal to dfw.
# For an F-distribution, if the degrees of freedom between samples is 2
# and the degrees of freedom within samples is 5, what is P(F > 3.5)?
print(st.f.sf(3.5, 2, 5))

# To find the probability between two critical values, the difference between the two probabilities is calculated.
# For an F-distribution, if the degrees of freedom between samples is 2
# and the degrees of freedom within samples is 5, what is P(2 < F < 3)?
print(st.f.cdf(3, 2, 5) - st.f.cdf(2, 2, 5))

## Chi Square Distribution

In [4]:
from scipy.stats import chi2

# Defines the degrees of freedom and chi-square_0
df = 7
x2 =  1.689

# chi2.cdf can be used to calculate the area under the curve between 0 and chi-square_0.
# Calculates the area under the curve between 0 and chi-square_0
area = chi2.cdf(x2, df)
print(area)

# Conversely, if the area is defined, chi2.ppf gives the chi-square_0 value, or percentile, necessary to obtain that area.
# Defines an area under the curve
a = 0.025
# Calculates the percentile
perc = chi2.ppf(a, df)
print(perc)

0.02496315095256541
1.689869180677355
