## Bayes Theorem: Walkthrough

In [1]:
#from scipy import stats 
import scipy.stats as st

### Usefull functions for probability and statistics:

In [None]:
#rvs - generation of random numbers

In [2]:
# creation of the binomial sistribution
distribution = st.binom(1000,p=0.1)
# generate 10 numbers rom distribution above
x = distribution.rvs(10)
x

array([ 91,  95,  78, 102,  94, 120,  94,  95, 116,  95])

In [5]:
help(st.binom)

Help on binom_gen in module scipy.stats._discrete_distns object:

class binom_gen(scipy.stats._distn_infrastructure.rv_discrete)
 |  binom_gen(a=0, b=inf, name=None, badvalue=None, moment_tol=1e-08, values=None, inc=1, longname=None, shapes=None, extradoc=None, seed=None)
 |  
 |  A binomial discrete random variable.
 |  
 |  %(before_notes)s
 |  
 |  Notes
 |  -----
 |  The probability mass function for `binom` is:
 |  
 |  .. math::
 |  
 |     f(k) = \binom{n}{k} p^k (1-p)^{n-k}
 |  
 |  for ``k`` in ``{0, 1,..., n}``.
 |  
 |  `binom` takes ``n`` and ``p`` as shape parameters.
 |  
 |  %(after_notes)s
 |  
 |  %(example)s
 |  
 |  Method resolution order:
 |      binom_gen
 |      scipy.stats._distn_infrastructure.rv_discrete
 |      scipy.stats._distn_infrastructure.rv_generic
 |      builtins.object
 |  
 |  Methods inherited from scipy.stats._distn_infrastructure.rv_discrete:
 |  
 |  __init__(self, a=0, b=inf, name=None, badvalue=None, moment_tol=1e-08, values=None, inc=1, long

In [3]:
#pmf - probability mass function
distribution.pmf(100) # there is ~4% probability that randomly generated number will be 100

0.042016790861120215

In [4]:
distribution.pmf([99,100,101])

array([0.04197016, 0.04201679, 0.04160078])

In [6]:
#cdf - probability distribution function
distribution.cdf(100) # there is ~52% probability that randomly generated number will be 100 or lower

0.5265990812946334

In [7]:
distribution.cdf([150,100])

array([0.99999972, 0.52659908])

In [None]:
#pdf - probability distribution function
# continuous equivalent of pmf

In [8]:
# mean()
distribution.mean()

100.0

In [9]:
# std()
distribution.std()

9.486832980505138

Reference for [scipy documentation](https://docs.scipy.org/doc/scipy-0.18.1/reference/stats.html).

In [10]:
# available discrete distributions
[name for name in dir(st) if isinstance(getattr(st, name), st.rv_discrete)]

['bernoulli',
 'betabinom',
 'binom',
 'boltzmann',
 'dlaplace',
 'geom',
 'hypergeom',
 'logser',
 'nbinom',
 'planck',
 'poisson',
 'randint',
 'skellam',
 'yulesimon',
 'zipf']

In [11]:
# available continuous distributions
[name for name in dir(st) if isinstance(getattr(st, name), st.rv_continuous)]

['alpha',
 'anglit',
 'arcsine',
 'argus',
 'beta',
 'betaprime',
 'bradford',
 'burr',
 'burr12',
 'cauchy',
 'chi',
 'chi2',
 'cosine',
 'crystalball',
 'dgamma',
 'dweibull',
 'erlang',
 'expon',
 'exponnorm',
 'exponpow',
 'exponweib',
 'f',
 'fatiguelife',
 'fisk',
 'foldcauchy',
 'foldnorm',
 'frechet_l',
 'frechet_r',
 'gamma',
 'gausshyper',
 'genexpon',
 'genextreme',
 'gengamma',
 'genhalflogistic',
 'geninvgauss',
 'genlogistic',
 'gennorm',
 'genpareto',
 'gilbrat',
 'gompertz',
 'gumbel_l',
 'gumbel_r',
 'halfcauchy',
 'halfgennorm',
 'halflogistic',
 'halfnorm',
 'hypsecant',
 'invgamma',
 'invgauss',
 'invweibull',
 'johnsonsb',
 'johnsonsu',
 'kappa3',
 'kappa4',
 'ksone',
 'kstwo',
 'kstwobign',
 'laplace',
 'levy',
 'levy_l',
 'levy_stable',
 'loggamma',
 'logistic',
 'loglaplace',
 'lognorm',
 'loguniform',
 'lomax',
 'maxwell',
 'mielke',
 'moyal',
 'nakagami',
 'ncf',
 'nct',
 'ncx2',
 'norm',
 'norminvgauss',
 'pareto',
 'pearson3',
 'powerlaw',
 'powerlognorm',
 

## Bayes Theorem: Drug Tester Example

The machine for drug tests has accuracy 99% --> 99% of users are tested positive and 99% of non-user are tested negative. 0.5% of population are drug-users. 

How big is the chance that a person is drug user when tested positive?

We will use **Bayes Theorem**:

<img src="bayes_theorem.png" alt="Drawing" style="width: 600px;"/>

In [12]:
# using bayes theorem:
# a - person is drug user
# b - test is positive
prob_a = 0.005 # probability of drug user
prob_b_a = 0.99 # probability of positive test, given that person is drug user
prob_a_neg = 0.995 # probability of non-drug user
prob_b_a_neg = 0.01 # probability of positive test, given that person is non-drug user (test error)

prob_a_b = prob_b_a*prob_a / ((prob_b_a*prob_a) + (prob_b_a_neg*prob_a_neg))
print(prob_a_b)

0.33221476510067116
