# Measure Sampling Time - Numpyro


# Select Data, IRT model, and Device


In [1]:
# 0 -> bone
# 1 (others) -> brain

DATA = 0 
#DATA = 1



In [2]:
# 0 -> 1PL-IRT
# 1 (others) -> 2PL-IRT 

MODEL = 0
#MODEL = 1

In [3]:
# 0 -> CPU
# 1 -> GPU
# 2 -> GPU vectorized

#DEVICE = 0
DEVICE = 1

###########################
#DEVICE = 2 # do not work
###########################

In [4]:
num_chains = 2

if DEVICE == 2:
    chain_method = 'vectorized'
else:
    chain_method = 'parallel'

# Prepare

In [5]:
#! cat /proc/cpuinfo

In [6]:
#! pip install -q "jax[cuda11_cudnn805]"==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
! pip install -q numpyro==0.10.1 arviz==0.12.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m292.7/292.7 KB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m63.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pymc 5.1.2 requires arviz>=0.13.0, but you have arviz 0.12.1 which is incompatible.[0m[31m
[0m

In [7]:
import numpy as np
import pandas as pd
import datetime as dt
import time

import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
import numpyro
import numpyro.distributions as dist

import jax
import arviz as az
import jax.numpy as jnp

In [9]:
if DEVICE == 0:
    numpyro.set_platform('cpu')
    numpyro.set_host_device_count(num_chains)
else:
    numpyro.set_platform('gpu')
    n = jax.device_count()
    print("number of GPU", n)
    if n < 1:
        raise Exception("no GPU")
    else:
        ! nvidia-smi

number of GPU 1
Fri Mar 24 01:38:22 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   66C    P0    29W /  70W |    105MiB / 15360MiB |      4%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------

## Import Data

In [10]:
fff = "idata_ppc_for_data%s_model%s.nc" % (DATA, MODEL)

! wget https://filedn.com/lpAczQGgeBjkX6l7SpI5JJy/__ws/stan_irt_nrm_rad/{fff} -O idata_ppc.nc

--2023-03-24 01:38:22--  https://filedn.com/lpAczQGgeBjkX6l7SpI5JJy/__ws/stan_irt_nrm_rad/idata_ppc_for_data0_model0.nc
Resolving filedn.com (filedn.com)... 74.120.9.25
Connecting to filedn.com (filedn.com)|74.120.9.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28875064 (28M) [application/x-netcdf]
Saving to: ‘idata_ppc.nc’


2023-03-24 01:38:37 (1.97 MB/s) - ‘idata_ppc.nc’ saved [28875064/28875064]



In [11]:
idata_ppc = az.from_netcdf('idata_ppc.nc')

In [12]:
y_ppc = idata_ppc.posterior_predictive['obs'].to_numpy()
y_ppc.shape

(6, 3000, 60, 7)

# Run Numpyro

## Define Model

In [13]:
def model_1pl(y=None, num_cases=0, num_doctors=0):
    with numpyro.plate('doctor', num_doctors):
      theta = numpyro.sample('theta', dist.Normal(0, 2))

    with numpyro.plate('case', num_cases, dim=-2):
        beta = numpyro.sample('beta', dist.Normal(0, 2))
        with numpyro.plate('doctor', num_doctors):
            mu = theta - beta
            numpyro.sample('obs', dist.Bernoulli(logits=mu), obs=y)

In [14]:
def model_2pl(y=None, num_cases=0, num_doctors=0):
    with numpyro.plate('doctor', num_doctors):
      theta = numpyro.sample('theta', dist.Normal(0, 2))

    with numpyro.plate('case', num_cases, dim=-2):
        beta = numpyro.sample('beta', dist.Normal(0, 2))
        log_d = numpyro.sample('log_d', dist.Normal(0.5, 1))
        with numpyro.plate('doctor', num_doctors):
            mu = jnp.exp(log_d)*(theta - beta)
            numpyro.sample('obs', dist.Bernoulli(logits=mu), obs=y)

In [15]:
model = model_1pl if MODEL == 0 else model_2pl

In [16]:
nuts = numpyro.infer.NUTS(model)

#mcmc = numpyro.infer.MCMC(nuts, num_warmup=500, num_samples=3000, num_chains=num_chains)
mcmc = numpyro.infer.MCMC(nuts, num_warmup=500, num_samples=3000, num_chains=num_chains, chain_method=chain_method)


  mcmc = numpyro.infer.MCMC(nuts, num_warmup=500, num_samples=3000, num_chains=num_chains, chain_method=chain_method)


## Measure Inference Time

In [17]:
key = jax.random.PRNGKey(0)

factors = [1, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000] 
#factors = [1, 50, 20, 10, 5, 2, 1] 

num_doctors = y_ppc.shape[-1]

In [18]:
lines = []

for k, factor in enumerate(factors):
    y_simulated = y_ppc[0, :factor, :, :].reshape(-1, num_doctors)
    print("simulated data shape:", y_simulated.shape)

    num_cases = y_simulated.shape[0]
    start_time = dt.datetime.now()

    mcmc.run(key, y=y_simulated, num_cases=num_cases, num_doctors=num_doctors)

    end_time = dt.datetime.now()
    elapsed_time = (end_time - start_time).total_seconds()
    lines.append(f'{num_cases},{start_time},{end_time},{elapsed_time}')

    time.sleep(1) # sleep 1 sec

simulated data shape: (60, 7)


sample: 100%|██████████| 3500/3500 [00:23<00:00, 149.57it/s, 15 steps of size 4.31e-01. acc. prob=0.85]
sample: 100%|██████████| 3500/3500 [00:26<00:00, 131.72it/s, 15 steps of size 3.78e-01. acc. prob=0.89]


simulated data shape: (60, 7)


sample: 100%|██████████| 3500/3500 [00:23<00:00, 150.25it/s, 15 steps of size 4.31e-01. acc. prob=0.85]
sample: 100%|██████████| 3500/3500 [00:25<00:00, 136.87it/s, 15 steps of size 3.78e-01. acc. prob=0.89]


simulated data shape: (120, 7)


sample: 100%|██████████| 3500/3500 [00:30<00:00, 114.61it/s, 15 steps of size 3.60e-01. acc. prob=0.87]
sample: 100%|██████████| 3500/3500 [00:27<00:00, 127.19it/s, 15 steps of size 3.76e-01. acc. prob=0.85]


simulated data shape: (300, 7)


sample: 100%|██████████| 3500/3500 [00:30<00:00, 114.90it/s, 15 steps of size 2.96e-01. acc. prob=0.86]
sample: 100%|██████████| 3500/3500 [00:28<00:00, 124.77it/s, 15 steps of size 2.94e-01. acc. prob=0.86]


simulated data shape: (600, 7)


sample: 100%|██████████| 3500/3500 [00:32<00:00, 108.91it/s, 15 steps of size 2.59e-01. acc. prob=0.85]
sample: 100%|██████████| 3500/3500 [00:30<00:00, 113.81it/s, 15 steps of size 2.35e-01. acc. prob=0.88]


simulated data shape: (1200, 7)


sample: 100%|██████████| 3500/3500 [00:34<00:00, 100.04it/s, 15 steps of size 2.45e-01. acc. prob=0.82]
sample: 100%|██████████| 3500/3500 [00:32<00:00, 107.21it/s, 15 steps of size 2.32e-01. acc. prob=0.84]


simulated data shape: (3000, 7)


sample: 100%|██████████| 3500/3500 [00:59<00:00, 58.91it/s, 31 steps of size 1.90e-01. acc. prob=0.83]
sample: 100%|██████████| 3500/3500 [00:57<00:00, 60.91it/s, 31 steps of size 1.74e-01. acc. prob=0.86]


simulated data shape: (6000, 7)


sample: 100%|██████████| 3500/3500 [01:01<00:00, 56.92it/s, 31 steps of size 1.56e-01. acc. prob=0.84]
sample: 100%|██████████| 3500/3500 [00:57<00:00, 60.91it/s, 31 steps of size 1.60e-01. acc. prob=0.83]


simulated data shape: (12000, 7)


sample: 100%|██████████| 3500/3500 [01:08<00:00, 50.98it/s, 31 steps of size 1.31e-01. acc. prob=0.85]
sample: 100%|██████████| 3500/3500 [01:05<00:00, 53.17it/s, 31 steps of size 1.37e-01. acc. prob=0.83]


simulated data shape: (30000, 7)


sample: 100%|██████████| 3500/3500 [01:59<00:00, 29.38it/s, 63 steps of size 9.67e-02. acc. prob=0.86]
sample: 100%|██████████| 3500/3500 [01:56<00:00, 30.14it/s, 63 steps of size 9.96e-02. acc. prob=0.85]


simulated data shape: (60000, 7)


sample: 100%|██████████| 3500/3500 [02:27<00:00, 23.78it/s, 63 steps of size 9.12e-02. acc. prob=0.83]
sample: 100%|██████████| 3500/3500 [02:22<00:00, 24.57it/s, 63 steps of size 9.62e-02. acc. prob=0.81]


## Export Data

In [19]:
path = "time_measured_numpyro_data%s_model%s_device%s.csv" % (DATA, MODEL, DEVICE)

header = ['num_cases,start_time,end_time,elapsed_time']

with open(path, mode='w') as f:
    f.write('\n'.join(header + lines))

In [20]:
from google.colab import files

files.download(path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>