In [None]:
%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division
from IPython.display import display, HTML, Audio

# Random Distributions
- [List of probability distributions](https://en.wikipedia.org/wiki/List_of_probability_distributions)

In [None]:
random.random(4)

In [None]:
random.random( (4,4) )

In [None]:
mat = random.random((4,4))
mat

In [None]:
signal = random.random(100)
plot(signal, 'o')
pass

In [None]:
signal = (random.random(44100) * 2) - 1
plot(signal)
pass

In [None]:
Audio(data = signal, rate = 44100)

## Mean, median, mode

In [None]:
mean(signal)

In [None]:
random.random(10).mean()

In [None]:
random.random(100).mean()

In [None]:
random.random(1000).mean()

In [None]:
random.random(100000).mean()

In [None]:
random.random(10000000).mean()

In [None]:
random.random(100000000).mean()

In [None]:
type(random.random())

In [None]:
type(random.random(4))

In [None]:
random.random(4).dtype

In [None]:
randint(1, 7, 10)

In [None]:
randint(1, 21, 10)

In [None]:
randint(1, 7, 10).mean()

In [None]:
randint(1, 7, 1000).mean()

In [None]:
die = randint(1, 7, 1000)

In [None]:
mode(die)

In [None]:
from scipy.stats import mode
# The "mode" is the value that occurs most often.
mode(die)

In [None]:
1000 / 6 # each face is equally likely

In [None]:
hist(die, bins=6)
pass

In [None]:
signal = array([13, 18, 13, 14, 13, 16, 14, 21, 13])
signal.mean()

In [None]:
sort(signal)

In [None]:
signal.median()

In [None]:
mean(signal), median(signal)

In [None]:
sort(signal)

In [None]:
mode(signal)

In [None]:
mode(signal).mode[0]

In [None]:
hist(signal)
pass

In [None]:
signal = r_[13, 18, 13, 14, 13, 16, 14, 21, 13, 18, 18, 18]
print(type(signal))
mode(signal)

What the hell is "r\_"??? Read: https://docs.scipy.org/doc/numpy/reference/generated/numpy.r_.html

## Probability distributions
- normal (aka Gaussian)
- uniform
- exponential
- triangular
- gamma
- poisson

In [None]:
normal()

In [None]:
signal = normal(size=100)
plot(signal)
print(amax(signal))
pass

In [None]:

normal_matrix = normal(size=(128, 128))
subplot(121)
imshow(normal_matrix)
colorbar()

uniform_matrix = random.random(size=(128, 128)) # random.random is a "continuous uniform" distribution
subplot(122)
imshow(uniform_matrix)
colorbar()

pass

In [None]:
subplot(121)
hist(normal_matrix[:,0])
subplot(122)
hist(uniform_matrix[:,0])
pass

In [None]:
randint(1, 7, 10)
die = randint(1, 7, 10)
print(die)
hist(die, bins=6)
pass

In [None]:
die = randint(1, 7, 10)
hist(die, bins = 6)
pass

In [None]:
mybins = (arange(7) + 0.5)
print(mybins)
hist(die, bins = mybins)
pass

In [None]:
die = randint(1, 7, 100)
hist(die, bins = (arange(7) + 0.5))
pass

In [None]:
die = randint(1, 7, 10000)
hist(die, bins = (arange(7) + 0.5))
pass

Clearly, `randint` pulls integers from a uniform distribution

## [Normal distribution](https://en.wikipedia.org/wiki/Normal_distribution)

> very common continuous probability distribution. Normal distributions are important in statistics and are often used in the natural and social sciences to represent real-valued random variables whose distributions are not known.

Paramters are mean $\mu$ or "center" location and standard deviation $\sigma$ or "scale"


In [None]:
r = normal(size=10)
hist(r)
pass

In [None]:
r = normal(size=100)
hist(r)
pass

In [None]:
r = normal(0, 1, 10000)
hist(r)
pass

In [None]:
hist(r, 31)
pass

In [None]:
r = normal(0, 5, 10000) # <---- pull up the docs
hist(r, 31)
pass

In [None]:
hist(r)
pass

In [None]:
hist(r, 30)
pass

In [None]:
r = normal(0, 5, 1000)
type(r)

In [None]:
hist(r, 31)
print("mean:%f std:%f var:%f" % (r.mean(), r.std(), r.var()))

> The Standard Deviation (commonly $\sigma$) is a measure of how spread out numbers are. It is the square root of the Variance. The Variance ($\sigma^2$) is the average of the squared differences from the mean.

## [Exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution)

In [None]:
r = exponential(1, 10000)
hist(r, 30)
pass

In [None]:
r = exponential(2, 10000)
hist(r, 30)
pass

In [None]:
r = exponential(10, 10000)
hist(r, 30)
pass

In [None]:
r = exponential(1, 10000)
hist(r, 30)
pass

## [Triangular distribution](https://en.wikipedia.org/wiki/Triangular_distribution)

In [None]:
r = triangular(0, 0, 5, 10000)
hist(r)
pass

In [None]:
hist(r, 30)
pass

In [None]:
r = triangular(left=1, mode=3, right=9, size=10000)
hist(r)
pass

In [None]:
hist(r,31)
pass

## [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)

>  gamma distribution is a two-parameter family of continuous probability distributions. The common exponential distribution and chi-squared distribution are special cases of the gamma distribution.

Parameters are _shape_ $k$ and _scale_ $\theta$.

In [None]:
r = gamma(1, 2, 10000)
hist(r, 30)
pass

In [None]:
r = gamma(5, 1, 10000)
hist(r, 30)
pass

In [None]:
r = gamma(50, 1, 10000)
hist(r, 30)
pass

## [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution)

> a discrete probability distribution that expresses the probability of a given number of events occurring in a fixed interval of time and/or space if these events occur with a known average rate and independently of the time since the last event.

Parameter is $\lambda$ (lambda), the average number of events per interval.

- The number of meteors greater than 1 meter diameter that strike earth in a year
- The number of patients arriving in an emergency room between 10 and 11 pm

In [None]:
r = poisson(0.5, 10000)
hist(r, 30)
pass

In [None]:
r = poisson(10, 10000)
hist(r, 30)
pass

## [Binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution)

> the discrete probability distribution of the number of successes in a sequence of n independent experiments, each asking a yes–no question... The binomial distribution is frequently used to model the number of successes in a sample of size n drawn with replacement from a population of size N.

Parameters are $n$ [0, 1, 2, 3, ...] and $p$ [0.0, 1.0].

In [None]:
binomial(1000, 1.0/6.0)

In [None]:
100.0/6

In [None]:
print(binomial(10000, 1.0/6.0))
r = binomial(10000, 1.0/6.0, 10000)

In [None]:
hist(r, 30)
pass

## Probability density/mass functions

[Probability density function (PDF)](https://en.wikipedia.org/wiki/Probability_density_function)

> is used to specify the probability of the random variable falling within a particular range of values, as opposed to taking on any one value. This probability is given by the integral of this variable’s PDF over that range''

The area under the curve is 1.

[Probability mass function (PMF)](https://en.wikipedia.org/wiki/Probability_mass_function)

> gives the probability that a discrete random variable is exactly equal to some value. It differs from a probability density function (pdf) in that the latter is associated with continuous rather than discrete random variables.

The sum of all elements is 1.

In [None]:
from scipy import stats
quantiles = linspace(-0.5,1.5, 100)
plot(quantiles, stats.uniform.pdf(quantiles))
ylim((0, 1.1))

In [None]:
quantiles = linspace(-4.5, 4.5, 1024)
plot(quantiles, stats.norm.pdf(quantiles))
ylim((0, 1.1))

The probability density function $p$ of a Gaussian random variable $z$ is given by:

$$p_g = \frac{1}{\sigma \sqrt{2 \pi}} e^{\frac{-(z - \mu)^2}{2\sigma^2}}$$

In [None]:
quantiles = arange(50)
plot(quantiles, stats.poisson.pmf(quantiles, 0.2), 'o-')
plot(quantiles, stats.poisson.pmf(quantiles, 5), 'o-')
plot(quantiles, stats.poisson.pmf(quantiles, 20), 'o-')
legend(['$\lambda$ = 0.2','$\lambda$ = 5','$\lambda$ = 20'])
pass

In [None]:
# the PMF of a fair die
stem(arange(1, 7), [1/6] * 6)
ylim((0, 1))

By: Andrés Cabrera mantaraya36@gmail.com
For MAT course MAT 201A at UCSB

Adapted by Karl Yerkes

This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/

![http://i.creativecommons.org/l/by-nc-sa/3.0/88x31.png](http://i.creativecommons.org/l/by-nc-sa/3.0/88x31.png)