# Basic statistics with Python

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc 
import mpld3

mpld3.enable_notebook()

## Properties of the exponential function

### The exponential function

In [None]:
x_val = np.arange(0.0, 20.0, 0.1)

exp_val = np.exp(x_val)

plt.plot(x_val, exp_val)
plt.plot(x_val, 1.5**x_val)
plt.plot(x_val, 2**x_val)
plt.plot(x_val, 3**x_val)
plt.plot(x_val, 4**x_val)

plt.title('exp')
plt.axis([0, 10, 0, 500])
plt.xlabel('x (x)')
plt.ylabel('f (y)')
plt.grid(True)
plt.show()

### How is the distribution function constructed ?
Why is there a square in stead of abs ?

In [None]:
x_val = np.arange(-20.0, 20.0, 0.1)

exp_val = np.exp(-np.abs(x_val))

line_e, = plt.plot(x_val, exp_val)

line_15, = plt.plot(x_val, 1.5**(-np.abs(x_val)))
line_2, = plt.plot(x_val, 2**(-np.abs(x_val)))
line_3, = plt.plot(x_val, 3**(-np.abs(x_val)))
line_4, = plt.plot(x_val, 4**(-np.abs(x_val)))

plt.title('exp abs')
plt.legend([line_e, line_15, line_2, line_3, line_4],["e", "1.5", "2", "3", "4"] )
plt.axis([-15, 15, 0, 1])
plt.xlabel('x (x)')
plt.ylabel('f (y)')

plt.grid(True)
plt.show()

In [None]:
x_val = np.arange(-20.0, 20.0, 0.1)

exp_val = np.exp(-(x_val)**2)

line_e, = plt.plot(x_val, exp_val)

line_15, = plt.plot(x_val, 1.5**(-(x_val**2)))
line_2, = plt.plot(x_val, 2**(-(x_val**2)))
line_3, = plt.plot(x_val, 3**(-(x_val**2)))
line_4, = plt.plot(x_val, 4**(-(x_val**2)))

plt.title('exp')
plt.legend([line_e, line_15, line_2, line_3, line_4],["e", "1.5", "2", "3", "4"] )
plt.axis([-5, 5, 0, 1])
plt.xlabel('x (x)')
plt.ylabel('f (y)')
plt.grid(True)
plt.show()

### Adding mean and standard deviation

In [None]:
x_val = np.arange(-20.0, 20.0, 0.1)

# play with these two values to see an effect:
mean = -1.0
sd = 1.0

exp_val = np.exp(-(x_val-mean)**2/(2*sd**2))

line_e, = plt.plot(x_val, exp_val)

line_15, = plt.plot(x_val, 1.5**(-(x_val-mean)**2/(2*sd**2)))
line_2, = plt.plot(x_val, 2**(-(x_val-mean)**2/(2*sd**2)))
line_3, = plt.plot(x_val, 3**(-(x_val-mean)**2/(2*sd**2)))
line_4, = plt.plot(x_val, 4**(-(x_val-mean)**2/(2*sd**2)))

plt.title('exp')
plt.legend([line_e, line_15, line_2, line_3, line_4],["e", "1.5", "2", "3", "4"] )
plt.axis([-5, 5, 0, 1])
plt.xlabel('x (x)')
plt.ylabel('f (y)')
plt.grid(True)
plt.show()

### Properties of the GAUSS distribution

- symmetrical, centered around mean
- width is governed by standard deviation
  - 68 % of data are with __$ 1\sigma$__
  - 95 % of the data is within __$ 2\sigma$__ interval
  - 98 % of data are within the __$ 3\sigma$__ interval

## POISSON distribution
Small, destinct (non-continuous) datasets are often better described by the POISSON distribution

In [None]:
x_val = np.arange(1, 5.0, 1)

# play with these two values to see an effect:
mean = 2
sd = mean 

exp_val = mean**x_val * np.exp(-mean)/ scipy.misc.factorial(x_val)

line_p, = plt.plot(x_val, exp_val)

#line_15, = plt.plot(x_val, 1.5**(-(x_val-mean)**2/(2*sd**2)))
#line_2, = plt.plot(x_val, 2**(-(x_val-mean)**2/(2*sd**2)))
#line_3, = plt.plot(x_val, 3**(-(x_val-mean)**2/(2*sd**2)))
#line_4, = plt.plot(x_val, 4**(-(x_val-mean)**2/(2*sd**2)))

plt.title('POISSON distribution')
#plt.legend([line_e, line_15, line_2, line_3, line_4],["e", "1.5", "2", "3", "4"] )
#plt.axis([-5, 5, 0, 1])
plt.xlabel('x (x)')
plt.ylabel('f (y)')
plt.grid(True)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)

# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)


plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$')
plt.axis([40, 160, 0, 0.03])
plt.grid(True)
plt.show()