In [1]:
import numpy as np
import pandas as pd
import pystan

%matplotlib inline

In [2]:
def wbic(log_likelihood):
    return - np.mean(np.sum(log_likelihood, axis=1))

In [3]:
sample = np.array(pd.read_csv("./data.csv")['value'])

In [4]:
pareto = """
data {
    int<lower=0> N;
    real<lower=1.0> x[N];
    real<lower=0> minx;
}
parameters {
    real<lower=0.0001,upper=minx>   alpha;
    real<lower=0.0001>  beta; 
}
model {
    for(n in 1:N){
        target += 1/log(N) * pareto_lpdf(x[n] | alpha, beta);
    }
}
generated quantities{
    vector[N] log_lik;

    for(n in 1:N){
        log_lik[n] = pareto_lpdf(x[n] | alpha, beta);
    }
}
"""

In [5]:
log_normal = """
data {
  int<lower=0> N;
  real<lower=0.999> x[N];
}
parameters {
  real  mu;  
  real<lower=0>   sigma;
}
model {
  for(n in 1:N){
      target += 1/log(N) * lognormal_lpdf(x[n] | mu, sigma);
  }
}
generated quantities{
  vector[N] log_lik;
  
  for(i in 1:N){
    log_lik[i] = lognormal_lpdf(x[i] | mu, sigma);
  }
}
"""

In [6]:
gamma = """
data {
  int<lower=0> N;
  real<lower=0.999> x[N];
}
parameters {
  real<lower=0.0001>  alpha;
  real<lower=0.0001>   beta;
}
model {
  for(n in 1:N){
    target += gamma_lpdf(x[n] | alpha, beta);
    x[n] ~ gamma(alpha,beta);
  }
}
generated quantities{
  vector[N] log_lik;

  for(n in 1:N){
    log_lik[n] = gamma_lpdf(x[n] | alpha, beta);
  }
}

"""

In [7]:
data = {
    'N': len(sample),
    'x':  sample,
    'minx': 1
}

sm1 = pystan.stan(model_code=pareto, data=data)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_91927e1a511f0e96c2d7df9ec165d6a7 NOW.


In [8]:
data = {
    'N': len(sample),
    'x':  sample
}

sm2 = pystan.stan(model_code=log_normal, data=data)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_3047010197cac5706b4bd9d9f57169cb NOW.


In [9]:
data = {
    'N': len(sample),
    'x':  sample
}

sm3 = pystan.stan(model_code=gamma, data=data)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_c5f91b18028ef6dfbe2ed75cb9265a4e NOW.


In [10]:
print "pareto: ", wbic(sm1.extract()['log_lik'])
print "log_normal: ", wbic(sm2.extract()['log_lik'])
print "gamma", wbic(sm3.extract()['log_lik'])

pareto:  1440.73164437
log_normal:  1956.23255718
gamma 2293.03421784
