# COURSE: Master math by coding in Python
# SECTION: Probability theory
# VIDEO: Building distributions from random numbers


### https://www.udemy.com/course/math-with-python/?couponCode=202312
#### INSTRUCTOR: Mike X Cohen (http://sincxpress.com)

This code roughly matches the code shown in the live recording: variable names, order of lines, and parameter settings may be slightly different.

<a target="_blank" href="https://colab.research.google.com/github/mikexcohen/MathWithPython/blob/main/probability/mathWithPython_prob_distributions.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# review of normal distribution

shift = 5
stretch = 2
n = 1000

pnts = np.random.randn(n)*stretch + shift
print('Mean of %g, std of %g' %(np.mean(pnts),np.std(pnts)))

fig,ax = plt.subplots(1,2,figsize=(6,3))
ax[0].plot(pnts,'s',alpha=.5)
ax[1].hist(pnts,100)

plt.show()

In [None]:
# review of uniform distribution

shift = 5
stretch = 2
n = 1000

pnts = np.random.rand(n)*stretch + shift-.5*stretch
print('Mean of %g, range of %g' %(np.mean(pnts),np.max(pnts)-np.min(pnts)))

fig,ax = plt.subplots(2,1,figsize=(4,6))
ax[0].plot(pnts,'s',alpha=.5)
ax[1].hist(pnts,bins=[4,4.1,5.2,5.5,6],edgecolor='k')

plt.show()

In [None]:
# Poisson distribution
lam = 3.4

pnts = np.random.poisson(lam,n)
print('Mean of %g, variance of %g' %(np.mean(pnts),np.var(pnts)))

fig,ax = plt.subplots(2,1,figsize=(4,6))
ax[0].plot(pnts,'s',alpha=.5)
ax[1].hist(pnts,bins=np.arange(0,np.max(pnts)+1),edgecolor='w')

plt.show()

In [None]:
stretch = 1
shift = np.log(2)

pnts = np.random.lognormal(shift,stretch,n)
print('Mean of %g, variance of %g' %(np.mean(pnts),np.var(pnts)))

fig,ax = plt.subplots(2,1,figsize=(4,6))
ax[0].plot(pnts,'s',alpha=.5)
ax[1].hist(pnts,100)

plt.show()

# Exercise

In [None]:
# transform any distribution into Gaussian
from scipy.stats import rankdata

n = 500

orig_data  = 2**np.random.randn(n)

# rank transform
trans_data = rankdata( orig_data )

# add a data point to the top
trans_data = np.append(trans_data,np.max(trans_data)+1)

trans_data = 2*(trans_data/np.max(trans_data))-1
trans_data = np.delete(trans_data,n)

trans_data = np.arctanh( trans_data )


fig,ax = plt.subplots(1,2)

ax[0].hist(orig_data,30)
ax[0].set_xlabel('Value')
ax[0].set_ylabel('Count')

ax[1].hist(trans_data,30)
ax[1].set_xlabel('Value')
ax[1].set_ylabel('Count')

plt.show()