In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as stats

experiment = 'Exp1'

# Importing and selecting data
all_data = pd.read_csv('total_data.csv')
all_data = all_data[all_data['Experiment']==experiment]
all_data = all_data.drop(columns=['Unnamed: 0','TransDate','Test_name','Transect','Transect_Num','Experiment','Dist_Travelled'])

# Importing and selecting meta data
data_summary = pd.read_csv('data_summary.csv')
experiment_meta_data = data_summary[data_summary['Experiment']==experiment]
boat_lat = experiment_meta_data['boat.lat'].values[0]
boat_lon = experiment_meta_data['boat.lon'].values[0]
wind_dir = experiment_meta_data['Wind_Dir'].values[0]
wind_speed = experiment_meta_data['WindSpeed'].values[0]

# Converting lat and lon to distances from boat in downwind and crosswind directions
all_data['dist_lat']=(all_data['gps.lat']-boat_lat)*111000
all_data['dist_lon']=(all_data['gps.lon']-boat_lon)*111000
all_data['x']=all_data['dist_lon']*np.cos(90-wind_dir+180) + all_data['dist_lat']*np.sin(90-wind_dir+180)
all_data['y']=all_data['dist_lon']*np.cos(180-wind_dir+180) + all_data['dist_lat']*np.sin(180-wind_dir+180)
all_data['z']=all_data['altitudeRelative']



In [2]:
# Gaussian Plume Model for concentration
def C_func(x,y,z,u,a,b,Q,H):
    C = Q/(2*a*x**b*np.pi*u)*(np.exp(-(y**2)/(2*a*x**b)))*(np.exp(-(z-H)**2/(2*a*x**b))+np.exp(-(z+H)**2/(2*a*x**b)))
    return C

In [3]:
# Utility function for traceplots
def traceplots(x, xnames=None, title=None):
    N, d = x.shape
    fig = plt.figure()
    left, tracewidth, histwidth = 0.1, 0.65, 0.15
    bottom, rowheight = 0.1, 0.8/d
    spacing = 0.05
    
    for i in range(d):
        # Set the location of the trace and histogram viewports,
        # starting with the first dimension from the bottom of the canvas
        rowbottom = bottom + i*rowheight
        rect_trace = (left, rowbottom, tracewidth, rowheight)
        rect_hist = (left + tracewidth, rowbottom, histwidth, rowheight)
        # First set of trace plot axes
        if i == 0:
            ax_trace = fig.add_axes(rect_trace)
            ax_trace.plot(x[:,i])
            ax_trace.set_xlabel("Sample Count")
            ax_tr0 = ax_trace
        # Other sets of trace plot axes that share the first trace's x-axis
        # Make tick labels invisible so they don't clutter up the plot
        elif i > 0:
            ax_trace = fig.add_axes(rect_trace, sharex=ax_tr0)
            ax_trace.plot(x[:,i])
            plt.setp(ax_trace.get_xticklabels(), visible=False)
        # Title at the top
        if i == d-1 and title is not None:
            plt.title(title)
        # Trace y-axis labels
        if xnames is not None:
            ax_trace.set_ylabel(xnames[i])
        # Trace histograms at the right
        ax_hist = fig.add_axes(rect_hist, sharey=ax_trace)
        ax_hist.hist(x[:,i], orientation='horizontal', bins=50)
        plt.setp(ax_hist.get_xticklabels(), visible=False)
        plt.setp(ax_hist.get_yticklabels(), visible=False)
        xlim = ax_hist.get_xlim()
        ax_hist.set_xlim([xlim[0], 1.1*xlim[1]])

In [4]:
# Inference Parameters
sigma_data = 0.1
k_a = 0.33
theta_a = 1
k_b = 0.86
theta_b = 1
k_Q = 1.1e-3
theta_Q = 1e-3

k_0 = np.array([k_a,k_b,k_Q])
theta_0 = np.array([theta_a,theta_b,theta_Q])

# Model parameters
u = wind_speed
H = 0

# Function for calculating the log likelihood
def log_likelihood(abQ,u,H,all_data,sigma_data):
    log_lhood = 0
    for i in range(all_data.shape[0]):
        x = all_data['x'].values[i]
        y = all_data['y'].values[i]
        z = all_data['z'].values[i]
        C = all_data['Concentration'].values[i]*0.01**3

        log_lhood+=-(C_func(x,y,z,u,abQ[0],abQ[1],abQ[2],H)-C)**2/(2*sigma_data**2)
    return log_lhood

# Function for calculating the conditional probability for parameters a, b or Q
def log_cond_prob_abQ(abQ,k,theta,log_lhood):
    return -(k-1)*np.log(abQ) - abQ/theta + log_lhood

# MCMC Sampler for a, b or Q
def sample_abQ(abQ,k,theta,u,H,all_data,sigma_data,ss,ab_or_Q):
    # Set current and proposed values for a, b or Q
    current_abQ = abQ[ab_or_Q]
    proposed_abQ = stats.norm.rvs(loc=current_abQ, scale=ss[ab_or_Q])

    # Calculating the conditional probability of current and proposed a,b or Q
    log_l_prop_abQ = log_likelihood(abQ,u,H,all_data,sigma_data)
    log_p_prop_abQ = log_cond_prob_abQ(proposed_abQ,k,theta,log_l_prop_abQ)

    log_l_curr_abQ = log_likelihood(abQ,u,H,all_data,sigma_data)
    log_p_curr_abQ = log_cond_prob_abQ(current_abQ,k,theta,log_l_curr_abQ)

    # Acceptance criteria
    log_accept = log_p_prop_abQ - log_p_curr_abQ  
    if np.random.uniform(low=0, high=1) < np.exp(log_accept):
        abQ_sample = proposed_abQ
    else:
        abQ_sample = current_abQ
    return abQ_sample

# Full MCMC sampler
def sample_process(N_samples,ss,u,H,all_data,sigma_data,abQ_init=k_0):
    abQ_samples = []
    abQ_means = []

    # Set initial parameters
    abQ = abQ_init
    count=0
    # Run sampler
    for j in range(N_samples):
        count+=1
        print('Running Sample '+str(count))
        # Sample a
        a = sample_abQ(abQ,k_a,theta_a,u,H,all_data,sigma_data,ss,0)
        # Sample b
        b = sample_abQ(abQ,k_b,theta_b,u,H,all_data,sigma_data,ss,1)        
        # Sample Q
        Q = sample_abQ(abQ,k_Q,theta_Q,u,H,all_data,sigma_data,ss,2)

        abQ = np.array([a,b,Q])
        
        abQ_samples.append(abQ.T)
        abQ_means.append(np.mean(abQ_samples,axis=0))

    return abQ_samples, abQ_means, abQ

ss = [0.1,0.1,1e-4]
#N_samples = 10000
N_samples = 50

np.random.seed(117)
abQ_samples,abQ_means,abQ = sample_process(N_samples,ss,u,H,all_data,sigma_data)

Running Sample 1
Running Sample 2
Running Sample 3


In [None]:
traceplots(np.array(abQ_samples))
traceplots(np.array(abQ_means))

In [None]:
# Plotting slices of the plume at set Z values using the mean values of the parameters
abQ_mean = abQ_means[-1]
x = np.linspace(0.1,200,101)
y = np.linspace(-20,20,101)
z = np.linspace(0,400,101)
X,Y = np.meshgrid(x,y)
Z = 5
C = C_func(X,Y,Z,u,abQ_mean[0],abQ_mean[1],abQ_mean[2],H)
plt.pcolor(X,Y,C,shading='auto')
plt.colorbar()
print(abQ_mean)


In [None]:
# Calculating the RMSE of this new model based on the data
def RMSE_func(abQ,u,H,all_data,sigma_data):
    MS = 0
    for i in range(all_data.shape[0]):
        x = all_data['x'].values[i]
        y = all_data['y'].values[i]
        z = all_data['z'].values[i]
        C = all_data['Concentration'].values[i]*0.01**3
        MS_single = (C_func(x,y,z,u,abQ[0],abQ[1],abQ[2],H)-C)**2
        MS+=MS_single
    RMSE = np.sqrt(MS)
    print('RMSE = '+str(RMSE))
    return RMSE

RMSE_func(abQ_mean,u,H,all_data,sigma_data)

# # saving_samples = pd.DataFrame({'a':abQ_samples[:,0],'b':abQ_samples[:,1],'Q':abQ_samples[:,2]})
# saving_samples = pd.DataFrame(abQ_samples,columns=['a','b','Q'])
# saving_samples.to_csv('samples.csv')

In [None]:
# Print the values of a, b and Q
def print_vals(abQ_samples):
    abQ_samples = np.array(abQ_samples)
    vals = ['a','b','Q']
    for i in range(abQ_samples.shape[1]):
        print(vals[i]+' = '+str(np.mean(abQ_samples[:,i]))+' +/- '+str(np.std(abQ_samples[:,i])))
        
print_vals(abQ_samples)