In [4]:
# From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press
# (c) 2017,  Joseph M. Hilbe, Rafael S. de Souza and Emille E. O. Ishida 
# 
# you are kindly asked to include the complete citation if you used this 
# material in a publication
#
# Code 10.19 Bernoulli logit model, in Python using Stan, for assessing 
#            the relationship between Seyfert AGN activity and 
#            galactocentric distance
#
# Statistical Model: Bernoulli mixed model in Python using Stan
#
# Astronomy case: Relationship between Seyfert activity and 
#                 cluster centric distance - taken from 
#                 de Souza et al., 2016, MNRAS in  press, 
#                 arXiv:astro-ph/1603.06256
#
# 1 response variable (Y - galaxy class Seyfert - 1/AGN - 0)
# 2 explanatory variable (x1 - M200, x2 - cluster-centric distance)
#
# Data from: Trevisan, Mamon & Khosroshahi, 2017, MNRAS, 464, p.4593-4610
#            https://github.com/COINtoolbox/LOGIT_AGNs/tree/master/data

import numpy as np
import pandas as pd
import pystan 
import statsmodels.api as sm

# Data
path_to_data = 'https://raw.githubusercontent.com/astrobayes/BMAD/master/data/Section_10p8/Seyfert.csv'

# read data
data_frame = dict(pd.read_csv(path_to_data))

x1 = data_frame['logM200']
x2 = data_frame['r_r200']

data = {}
data['Y'] = data_frame['bpt']
data['X'] = sm.add_constant(np.column_stack((x1,x2)))
data['K'] = data['X'].shape[1]
data['N'] = data['X'].shape[0]
data['gal'] = [0 if item == data_frame['zoo'][0] else 1 
                 for item in data_frame['zoo']]
data['P'] = 2


In [5]:
print data['gal']

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [6]:
# Fit
stan_code="""
data{
    int<lower=0> N;                # number of data points
    int<lower=0> K;                # number of coefficients
    int<lower=0> P;                # number of populations
    matrix[N,K] X;                 # [logM200, galactocentric distance]
    int<lower=0, upper=1> Y[N];    # Seyfert 1/AGN 0
    int<lower=0, upper=1> gal[N];  # elliptical 0/spiral 1
}
parameters{
    matrix[K,P] beta;
    real<lower=0> sigma;
    real mu;
}
model{
    vector[N] pi;

    for (i in 1:N) {
        if (gal[i] == gal[1]) 
            pi[i] = dot_product(col(beta,1), X[i]);
        else 
            pi[i] = dot_product(col(beta,2), X[i]);
    }

    # shared hyperpriors
    sigma ~ gamma(0.001, 0.001);
    mu ~ normal(0, 100);

    # priors and likelihood
    for (i in 1:K) {
        for (j in 1:P) beta[i,j] ~ normal(mu, sigma);
    }

    Y ~ bernoulli_logit(pi);
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code, data=data, iter=5000, chains=3,
                  warmup=3000, thin=10, n_jobs=3)

# Output
print(fit)


INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_628c866c11a853fa82debba736af5cf4 NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


Inference for Stan model: anon_model_628c866c11a853fa82debba736af5cf4.
3 chains, each with iter=5000; warmup=3000; thin=10; 
post-warmup draws per chain=200, total post-warmup draws=600.

            mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
beta[1,1]   0.04  3.9e-3   0.09  -0.12  -0.02   0.04    0.1   0.23  497.0    1.0
beta[2,1]  -0.15  5.4e-3    0.1  -0.34  -0.22  -0.16  -0.09   0.03  320.0   1.01
beta[3,1]   0.18  5.2e-3   0.12  -0.02   0.09   0.17   0.26   0.42  517.0    1.0
beta[1,2] 4.0e-3  2.3e-3   0.05   -0.1  -0.03 3.5e-3   0.04   0.11  535.0    1.0
beta[2,2]  -0.02  2.4e-3   0.05  -0.12  -0.05  -0.02   0.02   0.09  467.0    1.0
beta[3,2] 1.6e-3  2.4e-3   0.06  -0.12  -0.03 8.7e-4   0.04   0.11  530.0    1.0
sigma       0.14  4.0e-3   0.09   0.02   0.08   0.12   0.18   0.39  451.0    1.0
mu          0.01  3.0e-3   0.08  -0.13  -0.03 5.3e-3   0.05   0.18  677.0    1.0
lp__       -1194    0.14    2.6  -1199  -1195  -1193  -1192  -1188  340.0    1.0

S