Coding up mass-radius-eccentricty hierarhcial Bayesian model in PyStan.

In [14]:
from __future__ import division, print_function
from matplotlib import rcParams
rcParams["savefig.dpi"] = 100
rcParams["font.size"] = 20
import os
import requests
import pandas as pd
from cStringIO import StringIO
import numpy as np
import pystan


# load data from input files
# TTV amplitudes
inputdata1 = "../data/input/TTVamplitudes.twos.t2.ready.for.jags.3_NAamps_posreal_uncs2.0.txt"
fx1 = open(inputdata1,'r')
datax1 = fx1.read()    
# Get column names
firstlinex1 = datax1.split('\n', 1)[0].split(' ')
firstlinex1 = [w.replace('"', '') for w in firstlinex1]
firstlinex1 = [w.replace('.', '_') for w in firstlinex1]
#print firstlinex1
fx1.close()
# organize data from each file name in list
datax1 = np.genfromtxt(inputdata1, dtype=None, skip_header=1,names=firstlinex1 )

# Outer planet pair data
inputdata2 = "../data/input/Planet_and_Star_data_from_Q1_to_Q12_period_sorting_Pouter_2_sorted.txt"
fx2 = open(inputdata2,'r')
datax2 = fx2.read()    
# Get column names
firstlinex2 = datax2.split('\n', 1)[0].split(' ')
firstlinex2 = [w.replace('"', '') for w in firstlinex2]
firstlinex2 = [w.replace('.', '_') for w in firstlinex2]
#print firstlinex1
fx2.close()
# organize data from each file name in list
datax2 = np.genfromtxt(inputdata2, dtype=None, skip_header=2,names=firstlinex2 )

# Inner planet pair data
inputdata3 = "../data/input/Planet_and_Star_data_from_Q1_to_Q12_period_sorting_Pinner_2_sorted.txt" 
fx3 = open(inputdata2,'r')
datax3 = fx3.read()    
# Get column names
firstlinex3 = datax3.split('\n', 1)[0].split(' ')
firstlinex3 = [w.replace('"', '') for w in firstlinex3]
firstlinex3 = [w.replace('.', '_') for w in firstlinex3]
#print firstlinex1
fx3.close()
# organize data from each file name in list
datax3 = np.genfromtxt(inputdata3, dtype=None, skip_header=3,names=firstlinex3 )

#print(datax3['koi_slogg'])
#print(datax2['kepoi_name'])
#print(datax1['Re_V_ext'])

#### Constants
Ndata = length(inputdata$KOI)
P.ext = inputdata$P.ext
P.int = inputdata$P.int
j = inputdata$j
g = inputdata$g
f = inputdata$f
delta = inputdata$Delta
print(delta)
pi = 3.14
m.sun = 332996.4274 #Msun in Mearth units
r.sun = 110.0
min.radius.s = 0.5*r.sun #put priors over this
max.radius.s = 2.0*r.sun  #put priors over this
min.mass.s = 0.8*m.sun
max.mass.s = 3.0*m.sun
####



sigma_radius_s_obs = r_sun * ( ( inputdata2$koi_srad_err1 + abs( inputdata2$koi_srad_err2 ) ) / 2.0 )
sigma_mass_s_obs = m_sun * ( ( inputdata2$koi_smass_err1 + abs( inputdata2$koi_smass_err2))/2.0)
sigma_radius_ratio_obs_int = ( inputdata3$koi_ror_err1 + abs(inputdata3$koi_ror_err2 ) ) / 2.0
sigma_radius_ratio_obs_ext = ( inputdata2$koi_ror_err1 + abs(inputdata2$koi_ror_err2 ) ) / 2.0
print(sigma_radius_ratio_obs_ext)
print(sigma_radius_s_obs)
print(sigma_mass_s_obs)

sigma.va.obs.int = inputdata1$sig.Re.V.int 
sigma.vb.obs.int = inputdata1$sig.Im.V.int 
sigma.va.obs.ext = inputdata1$sig.Re.V.ext 
sigma.vb.obs.ext = inputdata1$si.Im.V.ext 

va.obs.int = inputdata1$Re.V.int
va.obs.ext = inputdata1$Re.V.ext
vb.obs.int = inputdata1$Im.V.int
vb.obs.ext = inputdata1$Im.V.ext

radius.ratio.obs.int = inputdata3$koi.ror
radius.ratio.obs.ext = inputdata2$koi.ror
radius.s.obs = r.sun*(inputdata2$koi.srad)
mass.s.obs = m.sun*(inputdata2$koi.smass)

print(radius.s.obs)
print(mass.s.obs)


In [15]:

mass_radius_powerlaw_model = """

data {    

    int<lower=1> Nm;
    int<lower=1> Ndata;
    
}

parameters {
    simplex[Nm] f;
    real<lower=0> e_sigma[Nm];
    real<lower=-1,upper=1> h_int[Ndata];
    real<lower=-1,upper=1> k_int[Ndata];
    real<lower=-1,upper=1> h_ext[Ndata];
    real<lower=-1,upper=1> k_ext[Ndata];


model {
        
    real ps_h_int[Nm];
    real ps_k_int[Nm];
    real ps_h_ext[Nm];
    real ps_k_ext[Nm];


    mass_const ~ uniform(0.0,3.0);
    mass_radius_exp ~ uniform(0.0,2.0);
    sigma_mass_radius ~ uniform(0.0,3.0); 
    
    // eccentricity distribution model section (generalize for interior/exterior pairs):
    
    // interior planet eccentricty
    e_sigma ~ uniform(0, 0.5);
    for (n in 1:Ndata)
      for (j in 1:Nm) {
        ps_h_int[j] <- log(f[j]) + normal_log(h_int[n],0.0,e_sigma[j]);
      }
      increment_log_prob(log_sum_exp(ps_h_int));
    for (n in 1:Ndata)
      for (j in 1:Nm) {
        ps_k_int[j] <- log(f[j]) + normal_log(k_int[n],0.0,e_sigma[j]);
      }
      increment_log_prob(log_sum_exp(ps_k_int));
      
      //exterior planet eccentricity
    for (n in 1:Ndata)
      for (j in 1:Nm) {
        ps_h_ext[j] <- log(f[j]) + normal_log(h_ext[n],0.0,e_sigma[j]);
      }
      increment_log_prob(log_sum_exp(ps_h_ext));
    for (n in 1:Ndata)
      for (j in 1:Nm) {
        ps_k_ext[j] <- log(f[j]) + normal_log(k_ext[n],0.0,e_sigma[j]);
      }
      increment_log_prob(log_sum_exp(ps_k_ext));


    m_sun <- 332996.4274; // Msun in Mearth units
    r_sun <- 110.0;  // Rsun in Rearth units
    // m_star_const <- 1.37 * m_sun;
    
    min_radius_s <- 0.5 * r_sun; // put priors over this
    max_radius_s <- 2.0 * r_sun; // put priors over this
    
    min_radius_p <- 0.1;
    max_radius_p <- 10.0;
    // radius_p_exp ~ uniform(0.0,2.0);
    
    min_mass_s <- 0.8 * m_sun; // put priors over this
    max_mass_s <- 3.0 * m_sun;  // put priors over this
    
    // Constants (depends on P_ext, P_int, j)
    pi <- 3.14;
    // radius_s_true <- r_sun;
    // mass_s_true <- m_sun;
    
    for (i in 1:Ndata) {
    
        // DEFINITIONS //
    
        C1[i] <- P_int[i] / ( ( pi * ( j[i]^(2/3) ) * ( (j[i]-1.0)^(1/3) * delta[i] ) ) );

        C2[i] <- 3.0 / ( 2.0 * delta[i] );
        
        C3[i] <- P_ext[i] / ( pi * j[i] * delta[i] );
    

        radius_ratio_true_int[i] <- radius_p_true_int[i] / radius_s_true[i];

        mass_ref_int[i] <- mass_const * ( radius_p_true_int[i]^mass_radius_exp );


        radius_ratio_true_ext[i] <- radius_p_true_ext[i] / radius_s_true[i];

        mass_ref_ext[i] <- mass_const * ( radius_p_true_ext[i]^mass_radius_exp );


        mass_ratio_true_int[i] <- mass_p_true_int[i] / mass_s_true[i];

        va_true_int[i] <- ( (-1.0) * C1[i] * mass_ratio_true_ext[i] * fc[i] ) - ( ( C1[i] * C2[i] * mass_ratio_true_ext[i] ) * ( ( fc[i] * h_int[i] ) + ( g[i] * h_ext[i] ) ) );

        vb_true_int[i] <- ( C1[i] * C2[i] * mass_ratio_true_ext[i] ) * ( ( fc[i] * k_int[i] ) + ( g[i] * k_ext[i] ) );


        mass_ratio_true_ext[i] <- mass_p_true_ext[i] / mass_s_true[i];

        va_true_ext[i] <- ( (-1.0) * C3[i] * mass_ratio_true_int[i] * g[i] ) + ( ( C2[i] * C3[i] * mass_ratio_true_int[i] ) * ( ( fc[i] * h_int[i] ) + ( g[i] * h_ext[i] ) ) );

        vb_true_ext[i] <- ( (-1.0) * C2[i] * C3[i] * mass_ratio_true_int[i] ) * ( ( fc[i] * k_int[i] ) + ( g[i] * k_ext[i] ) );
    

        // DISTRIBUTIONS //
	
	    radius_s_true[i] ~ uniform( min_radius_s, max_radius_s );
        
        radius_s_obs[i] ~ normal( radius_s_true[i], 1.0 / ( sigma_radius_s[i] * sigma_radius_s[i] ) ) T[0,]; 
        mass_s_true[i] ~ uniform( min_mass_s, max_mass_s );
        mass_s_obs[i] ~ normal( mass_s_true[i], 1.0 / ( sigma_mass_s[i] * sigma_mass_s[i] ) );

        radius_p_true_int[i] ~ uniform( min_radius_p, max_radius_p ); 
        radius_ratio_obs_int[i] ~ normal( radius_ratio_true_int[i], 1.0 / ( sigma_radius_ratio_obs_int[i] * sigma_radius_ratio_obs_int[i] ) );

        mass_p_true_int[i] ~ normal( mass_ref_int[i], 1.0 / ( sigma_mass_radius * sigma_mass_radius ) ) T[0,];

        va_obs_int[i] ~ normal( va_true_int[i], 1.0 / ( sigma_va_obs_int[i] * sigma_va_obs_int[i] ) );
        vb_obs_int[i] ~ normal( vb_true_int[i], 1.0/ ( sigma_vb_obs_int[i] * sigma_vb_obs_int[i] ) );   

        radius_p_true_ext[i] ~ uniform( min_radius_p, max_radius_p ); 
        radius_ratio_obs_ext[i] ~ normal( radius_ratio_true_ext[i], 1.0 / ( sigma_radius_ratio_obs_ext[i] * sigma_radius_ratio_obs_ext[i] ) ); 

        mass_p_true_ext[i] ~ normal( mass_ref_ext[i], 1.0 / ( sigma_mass_radius * sigma_mass_radius ) ) T[0,];

        va_obs_ext[i] ~ normal( va_true_ext[i], 1.0 / ( sigma_va_obs_ext[i] * sigma_va_obs_ext[i] ) );
        vb_obs_ext[i] ~ normal( vb_true_ext[i], 1.0 / ( sigma_vb_obs_ext[i] * sigma_vb_obs_ext[i] ) ); 
    }
}

"""

data = {}

#sm = StanModel(model_code=mass_radius_powerlaw_model)
#fit = sm.sampling(data=data, iter=10, chains=2, init=init, n_jobs=-1)

fit = pystan.stan(model_code=mass_radius_powerlaw_model, data=data, iter=1, chains=1, n_jobs=-1);

la = fit.extract(permuted=True)  # return a dictionary of arrays
alpha = la['mass.radius.exp']
beta = la['mass.const']
lnf0 = la['sigma.mass.radius']
print(mass.radius.exp)
print(mass.const)
print(sigma.mass.radius)

a = fit.extract(permuted=False)
print(a)
print(fit)

print(mass.radius.exp)

plt.hist(mass.radius.exp)
fit.plot()




ValueError: Failed to parse Stan model 'anon_model_489ace20ccf74c7923a1bc29bfc6bb89'. Error message:
SYNTAX ERROR, MESSAGE(S) FROM PARSER:


ERROR at line 19

 18:    
 19:    model {
        ^
 20:            

PARSER EXPECTED: <one of the following:
  a variable declaration, beginning with type,
      (int, real, vector, row_vector, matrix, unit_vector,
       simplex, ordered, positive_ordered,
       corr_matrix, cov_matrix,
       cholesky_corr, cholesky_cov
  or '}' to close variable declarations>
