In [834]:
import numpy as np
from scipy.special import gamma
import matplotlib.pyplot as plt
import random
import scipy
from sklearn.preprocessing import normalize
%matplotlib tk

In [620]:
# Loading in the data
data = np.loadtxt('MineRecord_teamC.txt',dtype=int)

## Part B

In [891]:
def lambda1(n0,data):
    '''
    Picking out a random lambda 1 value from the Gamma(a1,b1)
    '''
    # Calculating a & b values
    x = data[:n0]
    a = 3 + np.sum(x)
    b = 1/(1 + n0)
    
    # Picking a lambda 1 values
    lambda1 = np.random.gamma(a,b)
    return lambda1

def lambda2(n0,data):
    '''
    Picking out a random lambda 2 from Gamma(a2,b2)
    '''
    # Calculating a & b values
    x = data[n0:]
    a = 3 + np.sum(x)
    b = 1/(1 + (112-n0))
    
    # Picking a lambda 2 value
    lambda2 = np.random.gamma(a,b)
    return lambda2

    
def lnprob(n0,data):
    '''
    Generates a new n0 value given the data and n0 value
    '''
    # splits the data up
    x1 = data[:int(n0)]
    x2 = data[int(n0):]
    
    # Gets lambda values
    l1 = lambda1(n0,data)
    l2 = lambda2(n0,data)
    
    # Creates a probability distribution
    prob_vals = []
    years = np.array(np.arange(1,113))
    for i in years:
        xx1 = data[:i]
        xx2 = data[i:]
        prob_vals.append(np.exp((np.log(l1)*np.sum(xx1))-(i*l1)+(np.log(l2)*np.sum(xx2))-(112-i)*l2))
    prob_vals = np.array((prob_vals-min(prob_vals))/(max(prob_vals)-min(prob_vals)))
    prob_n0 = prob_vals[n0-1]
    
    # Picks a new n0
    new_n0 = random.choices(years,weights=prob_vals)

    return int(np.array(new_n0)),prob_n0,l1,l2
    
    

In [991]:
def mcmc(n0_int,data,steps=100):
    '''
    MCMC to figure out the year regulations changed using Gibbs Sampling
    
    Inputs
    ------
    Intial year n0 guess
    Data
    Number of steps (default = 100)    
    
    Returns
    -------
    The mean of n0, lambda 1, and lambda 2 as a 1D array
    The arrays of n0, lamda 1, and lambda 2 as a 3D array
    
    
    
    Algorithm
    ---------
    From n0 picks lambda 1 and 2
    then using those values
    calculates the probability of n0
    choose another n0 value
    repeats 
    ''' 
    n0_vals = []
    n0_prob = []
    l1_vals = []
    l2_vals = []
    n0 = n0_int
    
    for i in range(steps):
        # Calculating probability of n0 being the year
        n0,prob,l1,l2 = lnprob(n0,data)
            
        # appending values
        n0_vals.append(n0)
        n0_prob.append(prob)
        l1_vals.append(l1)
        l2_vals.append(l2)

        
    # Changing into numpy arrays
    n0_vals = np.array(n0_vals)
    n0_prob = np.array(n0_prob)
    l1_vals = np.array(l1_vals)
    l2_vals = np.array(l2_vals)
    
    # Plotting data
    years = np.arange(1851,1963)
    d = plt.figure(1)
    plt.plot(years,data,marker='o',color='black')
    plt.xlabel('Year')
    plt.ylabel('Number of Accidents')
    d.show()
    
    # Plotting the histogram
    h = plt.figure(2)
    plt.hist(n0_vals,color='black',histtype='step',bins=100,density=True,weights=n0_prob)
    h.show()
    
    # Plotting lambda 1 and lambda 2
    l = plt.figure(3)
    plt.scatter(l1_vals,l2_vals,marker='o',color='black',s=.5)
    plt.xlabel('$\lambda 1$')
    plt.ylabel('$\lambda 2$')
    plt.show()

    return np.array([np.mean(n0_vals),np.mean(l1_vals),np.mean(l2_vals)]), [n0_vals,l1_vals,l2_vals]   
    

In [1001]:
means,values = mcmc(3,data,steps=10**4)

## Part C

The already saved the plots

## Part D

In [1002]:
means

array([53.0429    ,  3.51485443,  1.44935242])

## Part E

In [995]:
values[0]

array([55, 52, 55, ..., 53, 54, 54])

In [984]:
def partition(arr,bins):
    '''
    Breaks up an array into sub-arrays.
    If the array doesn't perfect break up, will just take the remainder at the end
    '''
    # makes list for N number of parameteres
    if len(arr) != 1:
        lists = [[] for _ in range(len(arr))]
    else:
        lists = []
        
    # Loops through each parameter and bins it up
    for i in range(len(lists)):
        # Putting the data into one array
        data = arr[i]
        
        # Placing the new array
        new_arr = lists[i]

        # How many time we can fully bin the array
        iterations = len(data)//bins

        # Breaking up the array
        for j in range(iterations):
            new_arr.append(data[:bins])
            data = data[bins:]
        
        # The remainder of the array is put at the end
        new_arr.append(arr[i])
        
        # Putting it back into the list 
        lists[i] = np.array(new_arr)
        
    return np.array(lists)

def meanError(arr,bins):
    '''
    Takes in an array resulting from an MCMC and caluclates the error associated with it 
    '''
    # This splits the array
    new_arr = partition(arr,bins)
    
    # Makes new arrays for the means
    means = [[] for _ in range(len(new_arr))]
    err = [[] for _ in range(len(new_arr))]
    
    # Takes the mean of each sub array and puts it into the mean array
    # Calculates the error through usual error estiamte: sqrt(variance/N)
    for i in range(len(means)):
        data = np.array(new_arr[i])
        for j in range(len(data)):
            means[i].append(np.mean(data[j]))
        err[i].append(np.sqrt(np.var(np.array(means[i]))/np.array(means[i]).shape[0]))
    err = np.array(err)
    means = np.array(means)
    
    # Calculates total error
    # error = sqrt(sigma_1**2+sigma_2**2+...+sigma_n**2)
    error = []
    for i in range(len(err)):
        error.append(err[i]**2)
    error = np.sqrt(np.sum(np.array(error)))/len(error)
    
    return err,means


In [976]:
test = partition(values,bins=1000)

In [990]:
test[2]

array([[[1.17130848, 1.56117329, 1.39189058, ..., 1.64295891,
         1.77695515, 1.41156922]]])

In [998]:
err, mean = meanError(values,bins=1000)

In [999]:
err, mean

(array([[0.0223357 ],
        [0.00214986],
        [0.00129798]]),
 array([[53.106     , 52.932     , 52.975     , 52.978     , 52.984     ,
         52.945     , 53.14      , 52.959     , 53.111     , 53.111     ,
         53.0241    ],
        [ 3.50664115,  3.51811607,  3.51719209,  3.52080286,  3.53275406,
          3.51807315,  3.52683166,  3.5195621 ,  3.52372231,  3.50779956,
          3.5191495 ],
        [ 1.44731598,  1.45712724,  1.44466654,  1.44959421,  1.44159849,
          1.44670035,  1.44601799,  1.45503594,  1.44547091,  1.45032702,
          1.44838547]]))