In [1]:
%matplotlib inline
from collections import defaultdict
import json

import matplotlib.pylab as plt

import numpy as np
import pandas as pd
from IPython.display import display, HTML

from cmdstanpy import CmdStanModel

from tqdm import tqdm
import nest_asyncio
nest_asyncio.apply()

from pathlib import Path
import importlib


  from .autonotebook import tqdm as notebook_tqdm


In [None]:

from bayesianquilts.models.logistic_regression import LogisticRegression
from bayesianquilts.metrics.classification import classification_metrics
from bayesianquilts.sampler import psis, nppsis

ModuleNotFoundError: No module named 'tensorflow'

In [4]:
logistic_horseshoe_code = """
data {
  int <lower=0> N;                // number  of  observations
  int <lower=0> d;                // number  of  predictors
  array[N-1] int<lower=0,upper=1> y;      // outputs
  matrix[N-1,d] x;                  // inputs
  real <lower=0>  scale_icept;    // prior  std for  the  intercept
  real <lower=0>  scale_global;   // scale  for  the half -t prior  for  tau
  real <lower=1>  nu_global;      // degrees  of  freedom  for the half -t prior for tau
  real <lower=1> nu_local;        // degrees  of  freedom  for  the half -t priors for  lambdas
  real <lower=0>  slab_scale;     // slab  scale  for  the  regularized  horseshoe
  real <lower=0> slab_df;         // slab  degrees  of  freedom  for the  regularized horseshoe

  //int<lower=0> N_tilde;
  //matrix[N_tilde, d] x_tilde;
  //array[N_tilde] int<lower=0,upper=1> y_obs;
}
parameters {
  real  beta0;
  vector[d] z;
  real <lower=0> tau;             // global  shrinkage  parameter
  vector <lower =0>[d] lambda;    // local  shrinkage  parameter
  real <lower=0> caux;
}
transformed  parameters {
  vector <lower =0>[d] lambda_tilde;    // ’truncated ’ local  shrinkage  parameter
  real <lower=0> c;                     // slab  scale
  vector[d] beta;                       // regression  coefficients
  vector[N-1] f;                          // latent  function  values
  c = slab_scale * sqrt(caux);
  lambda_tilde = sqrt( c^2 * square(lambda) ./ (c^2 + tau^2* square(lambda )) );
  beta = z .*  lambda_tilde*tau;
  f = beta0 + x*beta;
}
model {
  z ~ normal(0.0, 1.0); // half -t priors  for  lambdas  and tau , and  inverse -gamma  for c^2
  lambda ~ student_t(nu_local , 0.0, 1.0);
  tau ~ student_t(nu_global , 0.0, scale_global);
  caux ~ inv_gamma (0.5* slab_df , 0.5* slab_df );
  beta0 ~ normal(0.0,  scale_icept );
  y ~ bernoulli_logit(f);
}
generated quantities {
  vector[N-1] log_lik;
  // vector[N_tilde] loo_log_lik;

  for (nn in 1:(N-1))
    log_lik[nn] = bernoulli_logit_lpmf(y[nn] | x[nn] * beta + beta0);

  //for (nn in 1:N_tilde)
  //  loo_log_lik[nn] = bernoulli_logit_lpmf(y_obs[nn] | x_tilde[nn] * beta + beta0);
}
"""

with open("/tmp/ovarian_model.stan", 'w') as f:
  f.writelines(logistic_horseshoe_code)


In [5]:
X = pd.read_csv(f"{importlib.resources.path('bayesianquilts', 'data')}/overianx.csv", header=None)
y = pd.read_csv(f"{importlib.resources.path('bayesianquilts', 'data')}/overiany.csv", header=None)
batch_size = 6

X_scaled = (X - X.mean())/X.std()
X_scaled = X_scaled.fillna(0)
n = X_scaled.shape[0]
p = X_scaled.shape[1]

print((n, p))


(54, 1536)


In [6]:


tfdata = tf.data.Dataset.from_tensor_slices({'X': X_scaled, 'y':y})

def data_factory_factory(batch_size=batch_size, repeat=False, shuffle=False):
    def data_factory(batch_size=batch_size):
        if shuffle:
            out = tfdata.shuffle(batch_size*10)
        else:
            out = tfdata
        
        if repeat:
            out = out.repeat()
        return out.batch(batch_size)
    return data_factory

In [7]:

guessnumrelevcov = n / 10  # 20.
slab_scale = 2.5
scale_icept = 5.0
nu_global = 1
nu_local = 1
slab_df = 1
scale_global = guessnumrelevcov / ((p - guessnumrelevcov) * np.sqrt(n))

control = {"adapt_delta": 0.9999, "max_treedepth": 15}


In [8]:
sm = CmdStanModel(stan_file="/tmp/ovarian_model.stan")

15:09:18 - cmdstanpy - INFO - compiling stan file /tmp/ovarian_model.stan to exe file /tmp/ovarian_model


15:09:34 - cmdstanpy - INFO - compiled model executable: /tmp/ovarian_model


In [12]:
params = []
for i in tqdm(range(n)):

    params += [np.load(f'/tmp/ovarian/ovarian_loo_{i}.npy', allow_pickle=True)]
    pass

100%|██████████| 54/54 [00:05<00:00, 10.73it/s]


In [13]:
params[0]

array({'beta0': array([[1.73014],
       [1.995  ],
       [2.85325],
       ...,
       [5.50569],
       [2.66707],
       [5.86636]]), 'z': array([[ 0.951916  ,  0.361695  , -0.166305  , ..., -2.26088   ,
         1.60362   , -1.06377   ],
       [ 0.697351  ,  0.011118  , -0.0799946 , ..., -1.70535   ,
         1.73602   ,  0.0647172 ],
       [ 0.364159  , -0.0798547 ,  0.493215  , ...,  1.62581   ,
        -1.69351   , -0.114188  ],
       ...,
       [ 1.09704   ,  1.7801    , -0.358455  , ..., -0.524287  ,
        -0.629063  ,  1.24526   ],
       [ 1.22724   , -0.78208   , -0.559709  , ...,  0.43503   ,
        -0.658181  ,  1.00718   ],
       [-0.613617  ,  1.31096   , -0.585694  , ...,  0.00828814,
        -0.142699  , -1.0438    ]]), 'tau': array([[0.0102673 ],
       [0.00129425],
       [0.00302402],
       ...,
       [0.163678  ],
       [0.0157506 ],
       [0.0315607 ]]), 'lambda': array([[ 0.757861 ,  0.562305 , 13.7651   , ...,  1.17892  ,  0.218025 ,
         1.12

In [9]:
for i in tqdm(range(n)):
    y_ = y.drop(i)
    X_ = X_scaled.drop(i)
    _tfdata = tf.data.Dataset.from_tensor_slices({'X': X_scaled.drop(i), 'y':y.drop(i)})

    def _data_factory_factory(batch_size=batch_size, repeat=False, shuffle=False):
        def _data_factory(batch_size=batch_size):
            if shuffle:
                out = _tfdata.shuffle(batch_size*10)
            else:
                out = _tfdata
            
            if repeat:
                out = out.repeat()
            return out.batch(batch_size)
        return _data_factory
    
    _ovarian_data = {
        "N": n,
        "d": p,
        "slab_df": slab_df,
        "slab_scale": slab_scale,
        "scale_icept": scale_icept,
        "nu_global": 1,
        "nu_local": 1,
        "scale_global": np.abs(scale_global),
        "y": y_.astype(int)[0].to_numpy().tolist(),
        "x": X_.to_numpy().tolist(),
    }
    
    with open("/tmp/_ovarian_data.json", "w") as f:
        json.dump(_ovarian_data, f)
        
    fit = sm.sample(
        data="/tmp/_ovarian_data.json",
        iter_warmup=20000,
        iter_sampling=2000,
        thin=2,
        adapt_delta=0.9995,
        max_treedepth=15,
    )
    
    params = fit.stan_variables()
    params.keys()
    params['c'] = params['c'][:, tf.newaxis]
    params['tau'] = params['tau'][:, tf.newaxis]
    params['caux'] = params['caux'][:, tf.newaxis]
    params['beta0'] = params['beta0'][:, tf.newaxis]
    
    np.save(f'/tmp/ovarian_loo_{i}.npy', params)

  0%|          | 0/54 [00:00<?, ?it/s]15:09:34 - cmdstanpy - INFO - CmdStan start processing

[A

[A[A


[A[A[A


[A[A[A

[A[A

[A[A


[A[A[A
[A


[A[A[A

[A[A


[A[A[A
[A

[A[A
[A
[A

[A[A


[A[A[A
[A

[A[A
[A


[A[A[A

[A[A


[A[A[A
[A

[A[A
[A


[A[A[A
[A

[A[A
[A


[A[A[A

[A[A


[A[A[A

[A[A
[A

[A[A


[A[A[A

[A[A
[A

[A[A


[A[A[A
[A


[A[A[A

[A[A
[A


[A[A[A


[A[A[A

[A[A
[A


[A[A[A
[A


[A[A[A

[A[A


[A[A[A
[A


[A[A[A

[A[A


[A[A[A
[A


[A[A[A

[A[A


[A[A[A

[A[A
[A

[A[A


[A[A[A
[A


[A[A[A

[A[A
[A

[A[A
[A


[A[A[A

[A[A
[A

[A[A


[A[A[A
[A
[A

[A[A


[A[A[A
[A


[A[A[A
[A

[A[A


[A[A[A
[A


[A[A[A

[A[A
[A


[A[A[A
[A

[A[A


[A[A[A
[A

[A[A
[A


[A[A[A

[A[A


[A[A[A
[A

[A[A


[A[A[A
[A

[A[A


[A[A[A
[A


[A[A[A
[A

[A[A


[A[A[A
[A

[A[

KeyboardInterrupt: 




[A[A[A
[A