# Libraries

In [1]:
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import statsmodels.api   as sm
import seaborn           as sns
import pystan

# Configuring the regression parameters

In [3]:
my_data = pd.read_csv('../../Catalogue/binom_reg_dataset.csv')

In [5]:
# filtering the dataset
redshifts = my_data['Z']
index     = np.where(redshifts.values<=0.4)

# parameters of interest
logit_class = my_data['LOGIT_CLASS(1-UVUP;0-UVWEAK)'].values[index]    # y axis: logit class -- uv upturn yes or no
whan_class  = my_data['WHAN(0-NA;1-RP;2-wA;3-sA;4-SF)'].values[index]  # 1st parameter
redshift    = redshifts.values[index]                                  # 2nd parameter

In [6]:
# original parameters
x1    = redshift
x2    = whan_class
y     = logit_class
n_obs = x1.size

# new parameters - important for plotting!
n_obs2 = 50
x1_sim = np.linspace(x1.min(), x1.max(), n_obs2)
x2_sim = np.linspace(x2.min(), x2.max(), n_obs2)

plot_x1, plot_x2 = np.meshgrid(x1_sim, x2_sim)  # THIS IS WHERE THE GRID IS DONE
   
plot_x1 = plot_x1.reshape(2500, 1)
plot_x2 = plot_x2.reshape(2500, 1)

# dataset to be used in the regression
regression_data         = {}     # the regression data must be in the shape of a dictionary, as stated in the pystan manual
regression_data['K']    = 4      # number of betas
regression_data['X']    = sm.add_constant(np.column_stack((x1, x1**2, x2)))
regression_data['N']    = n_obs
regression_data['Y']    = y
regression_data['LogN'] = np.log(n_obs)
regression_data['X2']   = sm.add_constant(np.column_stack((plot_x1, plot_x1**2, plot_x2)))
regression_data['N2']   = n_obs2**2

In [7]:
print regression_data['X2'].shape

(2500, 4)


In [8]:
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> N2;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    matrix[N2,K] X2;
    real LogN;
    }

parameters{
    vector[K] beta;
    }

transformed parameters{
    vector[N] eta;
    eta = X * beta;
    }

model{
    tau ~ gamma(1E3,1E3);   //tau and mu config the shared priors
    mu ~ normal(0,1-E3)
    Y ~ bernoulli_logit(eta);
    }

generated quantities{
    vector[N2] etanew;
    real<lower=0, upper=1.0> pnew[N2];
    etanew = X2 * beta;
    for (j in 1:N2){
    pnew[j] = inv_logit(etanew[j]);
    }

"""

In [None]:
fit = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)