In [1]:
import numpy as np
from numpy.linalg import eig
from numpy import mean
from scipy import stats
from math import factorial

def variance(x):
    mean = np.mean(x)
    total = 0
    for element in x:
        total += (element - mean)**2
    return total/(len(x)-1)

def std(x):
    return np.sqrt(variance(x))

def skew(x):
    var = variance(x)
    mean = np.mean(x)
    total = 0
    for element in x:
        total += (element - mean)**3
    return total / (len(x) * (var**(3/2)))

def covariance(x, y):
    mean_x, mean_y = np.mean(x), np.mean(y)
    total = 0
    for i in range(len(x)):
        total += (x[i] - mean_x)*(y[i] - mean_y)
    
    return total/(len(x)-1)

def spearman(x, y):
    """Calculates Spearman Rank Coefficient for two arrays of data,
    x and y."""
    sorted_x_idx = stats.rankdata(x)
    sorted_y_idx = stats.rankdata(y)
    total = 0
    N = len(x)
    for i in range(N):
        total += (sorted_x_idx[i] - sorted_y_idx[i])**2
        
    return (1 - (6*total) / (N * (N**2 - 1)))

def binomial(r, p, n):
    frac = (factorial(n) / (factorial(r)*factorial(n-r)))
    return p**r * (1 - p)**(n - r) * frac

def poisson(r, lam):
    return (lam**r * np.exp(-lam)) / factorial(r)

def gaussian(x, mu, sigma):
    return (1 / (sigma*np.sqrt(2*np.pi))) * np.exp(-(x - mu)**2/2*sigma**2)

def Gamma(n):
    return factorial(n-1)

def chi_squared(chisq, nu):
    return (2**(-nu/2) * chisq**(nu/2 - 1) * np.exp(-chisq/2)) / Gamma(nu/2)

def uniform(x, N):
    return 1/N
    

In [2]:
omega = [.5, .9, 1.2, 1.5, 1.8, 2, 3.4, 4.1, 5, 5.1, 7.5, 8.5]
k = [.7, .8, 1.1, 1.2, 1.5, 1.8, 1.9, 2, 2.5, 2.6, 2.9, 3.5]

In [3]:
V = np.array([[variance(omega), covariance(omega, k)],
             [covariance(omega, k), variance(k)]])

eigenvalues, eigenvectors = eig(V)
eigenvectors

array([[ 0.95135429, -0.30809905],
       [ 0.30809905,  0.95135429]])

In [4]:
omega = [1, 2.5, 3, 4, 4.5, 6]
print(np.mean(omega))
print(variance(omega))
print(std(omega))
print(skew(omega))

3.5
3.0
1.7320508075688772
0.0


In [5]:
omega = [.5, 1, 1.5, 1.6, 3, 2.1, 2.5]
print(np.mean(omega))
print(variance(omega))
print(std(omega))
print(skew(omega))

1.7428571428571427
0.7428571428571429
0.8618916073713346
0.026638133469581587


In [6]:
x = [.1, .22, .25, .5, .55, .7, .8, .9, 1, 1.11, 1.12]
y = [1, 1.1, 1.1, 1.2, 1.3, 1.4, 1.4, 1.3, 1.6, 1.5, 1.4]
z = [.1, -.2, .3, .4, .1, -.4, .1, -.1, .6, .7, -.3]

covariance(y,z)

0.017000000000000008

In [7]:
.017 / (std(y) * std(z))

0.25806092613071363

In [8]:
x = [0, .2, .3, .4, .5, .7, .8, .9, 1, .9, 1.1]
y = [.9, 1.1, 1.2, 1.2, 1.3, 1.4, 1.5, 1.3, 1.6, 1.5, 1.3]
z = [-.1, -.2, .1, .2, .1, 0, .2, .1, .5, .6, .3]

covariance(x, y) / (std(x) * std(y))

0.8332164480906891

In [9]:
x = [.5, .7, .8, .9, 1.1, 1.3]
y = [.9, .8, 1.1, 1.2, 1.2, 1]

spearman(x, y)

0.5857142857142856

In [10]:
x = [.1, .3, .2, 0, .4, .5, .1, .2, .6, .5]
y = [.5, .7, .2, .3, .8, .1, .9, 0, .4, .6]

spearman(x, y)

-0.027272727272727337

In [11]:
binomial(3, .4, 5)

0.23040000000000005

In [12]:
poisson(0, 3)

0.049787068367863944

In [13]:
poisson(1, 4), poisson(5, 4)

(0.07326255555493671, 0.15629345185053165)

In [14]:
poisson(1, 4) / poisson(5, 4)

0.46875000000000006

In [15]:
gaussian(1, 0, 1) / gaussian(0, 0, 1)

0.6065306597126334

## Chi-squared

In [2]:
def chi_squared(chisq, nu):
    return (2**(-nu/2) * chisq**(nu/2 - 1) * np.exp(-chisq/2)) / Gamma(nu/2)

In [17]:
chi_squared(5,4)

0.1026062482798735

In [18]:
chi_squared(1, 10)

0.0007897534631674915

In [19]:
chi_squared(2, 8)

0.030656620097620196

In [20]:
stats.chi2.sf(8, 2)

0.018315638888734182

# Uncertainties

## Weighted Averages

In [3]:
def weighted_av_single(x, errors):
    """Calculates the weighted average of a set of measurements for a 
    single observation.
    
    Returns the weighted average and the error.
    
    -+-+-+-+-+-
    PARAMETERS
    -+-+-+-+-+-
    
    x: list containing the measurements
    errors: list containing the errors.
    """
    
    total_top = 0
    total_bottom = 0
    
    for i in range(len(x)):
        var = errors[i]**2
        total_top += x[i] / var
        total_bottom += 1/var
        
    mean_x = total_top/total_bottom
    sigma_x = 1/np.sqrt(total_bottom)
        
    return mean_x, sigma_x

In [4]:
weighted_av_single([.655, .59, .789], [.024, .08, .071])

(0.6628564627744128, 0.021870089601599807)

In [5]:
weighted_av_single([1, 2], [.5, .5])

(1.5, 0.35355339059327373)

In [6]:
covariance([2, 1.5], [0, -1])

0.25

In [7]:
def weighted_av_set(measurements, errors, correlation, precision=None):
    """Calculates the weighted average of a set of measurements for a 
    set of observables.
    
    Returns the weighted average and the error.
    
    CURRENTLY ONLY BUILT FOR 2D.
    
    -+-+-+-+-+-
    PARAMETERS
    -+-+-+-+-+-
    
    measurements: list of lists containing the measurements for each
                    experiment
    errors: list of lists containing the errors for each experiment
    correlation: list of lists containing the correlations for each 
                    experiment
    """
    V_list = []
    x_list = []
    size = len(measurements)
    
    for i in range(size):
        corr_mat = np.ones((size, size))
        cov_mat = np.ones((size, size))*correlation[i][0]*errors[i][0]*errors[i][1]
        for j in range(len(errors[i])):
            cov_mat[j, j] = errors[i][j]**2
            
            #corr_mat[j, -j-1] = correlation[i][0]

        inv_cov = np.linalg.inv(cov_mat)
        
        V_list.append(inv_cov)
        x_list.append(inv_cov @ measurements[i])
            
    V = np.linalg.inv(sum(V_list))
    x = V @ (sum(x_list))
    
    if precision == None:
        return x, V
    
    else:
        return np.round(x, precision), np.round(V, precision)
            
            


# format: [expt1], [expt2], ...
measurements = [[2, 0], [1.5, -1]]
errors = [[.2, .5], [.5, .3]]
correlation = [[.5], [.1]]

weighted_av_set(measurements, errors, correlation)

(array([ 1.81907285, -0.73774834]),
 array([[0.02930861, 0.01299338],
        [0.01299338, 0.06615894]]))

In [26]:
# format: [expt1], [expt2], ...
measurements = [[2, 0], [1.5, .5]]
errors = [[.5, .1], [.5, .2]]
correlation = [[0], [.3]]

weighted_av_set(measurements, errors, correlation)

(array([1.58506224, 0.10995851]),
 array([[0.12033195, 0.00311203],
        [0.00311203, 0.00792531]]))

In [27]:
A = [3, 2]
B = [0, 1]
C = [0, 1]
weighted_av_single(B, [1, 1])

(0.5, 0.7071067811865475)

In [10]:
def hypothesis_ratio(data, hypotheses=[gaussian, uniform], params=[[0, 1], [10]]):
    """Calculates the product of all ratios between the two hypotheses
    for a given set of data.
    
    -+-+-+-+-+-
    PARAMETERS
    -+-+-+-+-+-
    
    data: the data to which the two hypotheses apply
    hypotheses: the two distributions that we wish to compare for the data
    params: the parameters for the given hypotheses"""
    h0, h1 = hypotheses
    PI = 1
    for w in data:
        PI *= h0(w, *params[0]) / h1(w, *params[1])
    
    return PI

In [41]:
omega = [-1.0, -0.9, -0.7, -0.1, 0.0, 0.1, 0.2, 0.5, 0.6, 1.0]
hypothesis_ratio(omega)

140289.8050224156

In [42]:
omega = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5}
hypothesis_ratio(omega)

3.5611082650219095e-13

In [43]:
omega = {0, .1, .15, .2, .21}
hypothesis_ratio(omega, hypotheses=[uniform, gaussian], params=[[len(omega)], (.15, .9)])

0.018935743084422987

In [12]:
0.05 / 100

0.0005

In [19]:
def bayes_theorem(p_c_a, p_c_b, p_a):
    """
    Calculates Bayes' Theorem for a scenario with two possible main
    outcomes.
    
    -+-+-+-+-+-
    PARAMETERS
    -+-+-+-+-+-
    
    p_c_a: probability of event c given a is true
    p_c_b: probability of event c given b is true
    p_a: probability of event a being true
    
    -+-+-+-+-+-
    EXAMPLE
    -+-+-+-+-+-
    
    Consider a test that correctly predicts infection at a rate of 98%,
    and incorrectly predicts infection at a rate of 0.05%. The percentage
    of infected individuals in the population is 0.01%. The probability
    that someone is infected given a positive test result can be found
    using the following logic:
    
    - Event a is infection
    - Event b is health
    - Event c is a positive test
    
    bayes_theorem(0.98, 0.0005, 0.0001)
    
    """
    
    p_b = 1 - p_a
    p_c = p_c_a*p_a + p_c_b*p_b
    
    p_a_c = (p_c_a*p_a) / (p_c)
    
    return p_a_c

In [20]:
bayes_theorem(.98, .0005, .0001)

0.1638933021155615

In [21]:
bayes_theorem(.9999, .0001, .001)

0.9091653027823241

In [23]:
bayes_theorem(.9999, 1/1_000_000, .1)

0.9999909991809255

In [24]:
omega = [0, 1, 2, 4, 6]
hypothesis_ratio(omega, [poisson, poisson], [[3], [4]])

3.525890604554078

In [26]:
omega = [1, 2, 3, 5, 7]
hypothesis_ratio(omega, [poisson, poisson], [[3], [4]])

0.8367103680728915

In [27]:
omega = [1, 2, 3, 4, 5]
hypothesis_ratio(omega, [binomial, gaussian], [[.4, len(omega)], [3, 1]])

0.23838255092637145