# Python and Probability Notes

This section expects to introduce you to some familiar functions, code,  probability random variables 

## Visualizing a geometric series 

In [None]:
# Python code to generate a geometric sequence
import numpy as np    # imports numpy as np
import matplotlib.pyplot as plt # imports matplotlib.pyplot as plt
p = 1/2                        # probability 0.5 
n = np.arange(1,10)            # range of the plot
X = np.power(p,n)             # power under probability 
plt.bar(n,X)

## Computer N choose K

In [None]:
# Python code to compute (N choose K) and K!
from scipy.special import comb, factorial # import a sub-module from scipy.special library
n = 10    # set out the n
k = 2     # choose k
comb(n, k)  # apply the formula
factorial(k) # specify the factorial

## Inner product of two vectors 

In [None]:
# Python code to perform an inner product
import numpy as np
x = np.array([1,0,-1])
y = np.array([3,2,0])
z = np.dot(x,y)
print(z)

## Norm of a vector 

In [None]:
# Python code to compute the norm
import numpy as np
x = np.array([1,0,-1])
x_norm = np.linalg.norm(x)

## Weighted Norm of a vector 

In [None]:
# Python code to compute the norm
import numpy as np
x = np.array([1,0,-1])
x_norm = np.linalg.norm(x)

## Matrix Inverse

In [None]:
# Python code to compute a matrix inverse
import numpy as np
X      = np.array([[1, 3], [-2, 7], [0, 1]])
XtX    = np.dot(X.T, X)
XtXinv = np.linalg.inv(XtX)
print(XtXinv)

## System of Linear Equation 

In [None]:
# Python code to solve X beta = y
import numpy as np
X      = np.array([[1, 3], [-2, 7], [0, 1]])
y      = np.array([[2],[1],[0]])
beta   = np.linalg.lstsq(X, y, rcond=None)[0]
print(beta)

## Generate a histogram 

In [None]:
# Python code generate the histogram
import numpy as np
import matplotlib.pyplot as plt
f = np.loadtxt('./ch3_data_english.txt')
n = np.arange(26)
plt.bar(n, f/100)
ntag = ['a','b','c','d','e','f','g','h','i','j','k','l','m',\
    'n','o','p','q','r','s','t','u','v','w','x','y','z']
plt.xticks(n, ntag)
plt.show()


# Python code generate the histogram
import numpy as np
import matplotlib.pyplot as plt
q = np.random.randint(7,size=100)
plt.hist(q+0.5,bins=6)
plt.show()



## Mean of a vector

In [None]:
# Python code to compute the mean of a dataset
import numpy as np
X  = np.random.rand(10000)
mX = np.mean(X)

## Mean from PMF

In [None]:
# Python code to compute the expectation
import numpy as np
p = np.array([0.25, 0.5, 0.25])
x = np.array([0, 1, 2])
EX = np.sum(p*x)


## Binomial Random Variable

In [None]:
# Python code to generate 5000 Binomial random variables
import numpy as np
import matplotlib.pyplot as plt
p = 0.5
n = 10
X = np.random.binomial(n,p,size=5000)
plt.hist(X,bins='auto');

## Binomial CDF

In [None]:
# Python code to plot CDF of a binomial random variable
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
p = 0.5
n = 10
rv = stats.binom(n,p)
x  = np.arange(11)
F  = rv.cdf(x)
plt.plot(x, F, 'bo', ms=10);
plt.vlines(x, 0, F, colors='b', lw=5, alpha=0.5);

## Generate a uniform random variable

In [None]:
# Python code to generate 1000 uniform random numbers
import scipy.stats as stats
a = 0; b = 1;
X = stats.uniform.rvs(a,b,size=1000)
plt.hist(X);

## Mean, variance, median, mode of a uniform random variable

In [None]:
# Python code to compute empirical mean, var, median, mode
X = stats.uniform.rvs(a,b,size=1000)
M = np.mean(X)
V = np.var(X)
Med = np.median(X)
Mod = stats.mode(X)

## Probability of a uniform random variable 

In [None]:
 Python code to compute the probability P(0.2 < X < 0.3)
import scipy.stats as stats
a = 0; b = 1;
F = stats.uniform.cdf(0.3,a,b)-stats.uniform.cdf(0.2,a,b)

## Skewness and Kurtosis of a random variable

In [None]:
# Python code to compute skewness and kurtosis
import scipy.stats as stats
X = stats.gamma.rvs(3,5,size=10000)
s = stats.skew(X)
k = stats.kurtosis(X)

## Principles of Linear Regression 

In [None]:
# Python code to fit data points using a straight line

import numpy as np
import matplotlib.pyplot as plt

N = 50
x = np.random.rand(N)
a = 2.5                                 # true parameter
b = 1.3                                 # true parameter
y = a*x + b + 0.2*np.random.randn(N)    # Synthesize training data

X = np.column_stack((x, np.ones(N)))    # construct the X matrix
theta = np.linalg.lstsq(X, y, rcond=None)[0] # solve y = X theta

t = np.linspace(0,1,200)                # interpolate and plot
yhat = theta[0]*t + theta[1]
plt.plot(x,y,'o')
plt.plot(t,yhat,'r',linewidth=4)