In [1]:
# import modules
import numpy as np
from scipy import stats 
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import rc
from mpl_toolkits.axes_grid1.inset_locator import (inset_axes, InsetPosition, mark_inset)
import pandas as pd
import math
%matplotlib inline

In [2]:
# Read Text File in Python
# load posterior data file from dakota with accepted samples 
Filename = "posterior_SM_test1.dat"  #"RPM_dakota_w.dat"
DF = pd.read_csv(Filename,sep = "\s+|\t+|\s+\t+|\t+\s+",engine='python')
DF.head()

Unnamed: 0,mcmc_id,interface,FR,K,Y,rmse,CRNrmse
0,1,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62
1,2,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62
2,3,NO_ID,1.641706,-3.478398,-1.58879,1.83196,1549.09
3,4,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01
4,5,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01


In [3]:
#loop through rows of dataframe
#if current rmse matches previous rmse: sample has been rejected 
#we want to remove repeated samples for posterior distributions 
DF['Reject'] = DF.rmse.eq(DF.rmse.shift())   
    
DF.head()   

Unnamed: 0,mcmc_id,interface,FR,K,Y,rmse,CRNrmse,Reject
0,1,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62,False
1,2,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62,True
2,3,NO_ID,1.641706,-3.478398,-1.58879,1.83196,1549.09,False
3,4,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01,False
4,5,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01,True


In [4]:
#load dakota file with calculated log likelihoods 
Like_file = "raw_chain_loglikelihood_SM_test1.m"
DF_Like = pd.read_csv(Like_file,sep = "\s+|\t+|\s+\t+|\t+\s+",engine='python')
DF_Like.head()
#DF.to_csv("Likelihood.csv")

Unnamed: 0,Like
0,-6.57974
1,-6.57974
2,-6.41606
3,-4.35371
4,-4.35371


In [5]:
#add likelihood column to posterior dataframe 
DF['Like'] = DF_Like.Like
DF.head(10)

Unnamed: 0,mcmc_id,interface,FR,K,Y,rmse,CRNrmse,Reject,Like
0,1,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62,False,-6.57974
1,2,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62,True,-6.57974
2,3,NO_ID,1.641706,-3.478398,-1.58879,1.83196,1549.09,False,-6.41606
3,4,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01,False,-4.35371
4,5,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01,True,-4.35371
5,6,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,False,-6.37413
6,7,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,True,-6.37413
7,8,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,True,-6.37413
8,9,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,True,-6.37413
9,10,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,True,-6.37413


In [6]:
#remove repeated samples from dataframe 
#new_DF = DF.query("Reject = False")
new_DF = DF[DF.Reject==False]
new_DF = new_DF.reset_index(drop=True)
new_DF.head(10)

Unnamed: 0,mcmc_id,interface,FR,K,Y,rmse,CRNrmse,Reject,Like
0,1,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62,False,-6.57974
1,3,NO_ID,1.641706,-3.478398,-1.58879,1.83196,1549.09,False,-6.41606
2,4,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01,False,-4.35371
3,6,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,False,-6.37413
4,11,NO_ID,2.86735,-2.658376,-1.472506,1.35962,1714.91,False,-4.88589
5,13,NO_ID,2.297888,-2.456457,-1.418616,1.15005,1786.63,False,-4.37228
6,24,NO_ID,1.8271,-2.967671,-1.361243,0.975747,1713.89,False,-3.87017
7,27,NO_ID,2.63269,-3.091843,-1.547736,0.945069,1926.82,False,-4.05546
8,30,NO_ID,2.690065,-3.749509,-1.657428,0.915836,2375.45,False,-4.62127
9,34,NO_ID,2.8765,-2.656612,-1.652666,1.16576,1625.02,False,-4.23424


In [7]:
#save edited file to csv
new_DF.to_csv("Accepted_Final_SM_test1.csv")
new_DF.head()

Unnamed: 0,mcmc_id,interface,FR,K,Y,rmse,CRNrmse,Reject,Like
0,1,NO_ID,2.0,-4.0,-1.4,0.780884,3530.62,False,-6.57974
1,3,NO_ID,1.641706,-3.478398,-1.58879,1.83196,1549.09,False,-6.41606
2,4,NO_ID,1.855062,-3.408226,-1.382437,0.839622,2299.01,False,-4.35371
3,6,NO_ID,1.198227,-4.001504,-1.469654,1.61286,2213.28,False,-6.37413
4,11,NO_ID,2.86735,-2.658376,-1.472506,1.35962,1714.91,False,-4.88589
