In [1]:
%pylab inline
import theano
import theano.tensor as tt
import pymc3 as pm
import numpy.testing as npt

Populating the interactive namespace from numpy and matplotlib


  from ._conv import register_converters as _register_converters


In [2]:
from pymc3.blocking import DictToArrayBijection, ArrayOrdering


def build_joined(cost, args, ordering):
    vmap = ordering.vmap
    size = ordering.size
    dtype = theano.config.floatX
    args_joined = tt.vector('__args_joined')
    args_joined.tag.test_value = np.zeros(size, dtype=dtype)

    joined_slices = {}
    for vmap in vmap:
        sliced = args_joined[vmap.slc].reshape(vmap.shp)
        sliced.name = vmap.var
        joined_slices[vmap.var] = sliced

    replace = {var: joined_slices[var.name] for var in args}
    return args_joined, theano.clone(cost, replace=replace)


def logp_func(cost, grad_vars, **kwargs):
    ordering = ArrayOrdering(grad_vars)
    vars_joined, cost_joined = build_joined(
        cost, grad_vars, ordering)

    grad = tt.grad(cost_joined, vars_joined)
    grad.name = '__grad'

    inputs = [vars_joined]

    return theano.function(inputs, [cost_joined, grad], **kwargs)

In [3]:
# Edge case discovered in #2948
ndim = 3
with pm.Model() as m:
    pm.Lognormal('sigma',
                 mu=np.zeros(ndim),
                 tau=np.ones(ndim),
                 shape=ndim)  # variance for the correlation matrix
    pm.HalfCauchy('nu', beta=10)
    tr = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [nu_log__, sigma_log__]
100%|██████████| 1000/1000 [00:04<00:00, 203.55it/s]
INFO (theano.gof.compilelock): Waiting for existing lock by process '13530' (I am process '13531')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/laoj/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-3.5.2-64/lock_dir
INFO (theano.gof.compilelock): Waiting for existing lock by process '13530' (I am process '13532')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/laoj/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-3.5.2-64/lock_dir
INFO (theano.gof.compilelock): Waiting for existing lock by process '13531' (I am process '13532')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/laoj/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_

In [4]:
factors = [var.logpt for var in m.basic_RVs] + m.potentials 
logpt_map = tt.add(*map(tt.sum, factors))
logpt_sum = tt.sum([tt.sum(factor) for factor in factors])

In [15]:
m.profile(logpt_map).summary()

Function profiling
  Message: /home/laoj/Documents/Github/pymc3/pymc3/model.py:902
  Time in 1000 calls to Function.__call__: 1.322675e-02s
  Time in Function.fn.__call__: 1.751661e-03s (13.243%)
  Time in thunks: 5.264282e-04s (3.980%)
  Total compile time: 1.107237e-01s
    Number of Apply nodes: 3
    Theano Optimizer time: 5.706906e-02s
       Theano validate time: 4.184246e-04s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.852036e-03s
       Import time 0.000000e+00s
       Node make_thunk time 1.621962e-03s
           Node Elemwise{Composite{(i0 + Switch(Cast{int8}(GE(exp(i1), i2)), (i3 - log1p(sqr((i4 * exp(i1))))), i5) + i1)}}[(0, 0)](__logp_sigma_log__, nu_log__, TensorConstant{0}, TensorConstant{-2.7541678283542828}, TensorConstant{0.1}, TensorConstant{-inf}) time 6.444454e-04s
           Node Elemwise{Composite{((i0 + (i1 * sqr(Composite{log(exp(i0))}(i2))) + i2) - Composite{log(exp(i0))}(i2))}}(TensorConstant{(1,) of -0..5332046727}, TensorConstant{

In [16]:
m.profile(logpt_sum).summary()

Function profiling
  Message: /home/laoj/Documents/Github/pymc3/pymc3/model.py:902
  Time in 1000 calls to Function.__call__: 1.394749e-02s
  Time in Function.fn.__call__: 2.378464e-03s (17.053%)
  Time in thunks: 7.913113e-04s (5.674%)
  Total compile time: 1.033812e-01s
    Number of Apply nodes: 5
    Theano Optimizer time: 5.309653e-02s
       Theano validate time: 3.561974e-04s
    Theano Linker time (includes C, CUDA code generation/compiling): 2.921343e-03s
       Import time 0.000000e+00s
       Node make_thunk time 2.716064e-03s
           Node Elemwise{Composite{(Switch(Cast{int8}(GE(exp(i0), i1)), (i2 - log1p(sqr((i3 * exp(i0))))), i4) + i0)}}(nu_log__, TensorConstant{0}, TensorConstant{-2.7541678283542828}, TensorConstant{0.1}, TensorConstant{-inf}) time 1.189947e-03s
           Node Elemwise{Composite{((i0 + (i1 * sqr(Composite{log(exp(i0))}(i2))) + i2) - Composite{log(exp(i0))}(i2))}}(TensorConstant{(1,) of -0..5332046727}, TensorConstant{(1,) of -0.5}, sigma_log__) time 

In [5]:
func_map = theano.function(m.basic_RVs, logpt_map)
func_sum = theano.function(m.basic_RVs, logpt_sum)

In [6]:
func_map2 = logp_func(logpt_map, m.basic_RVs)
func_sum2 = logp_func(logpt_sum, m.basic_RVs)

In [7]:
theano.printing.pydotprint(logpt_map, outfile="map.png", var_with_name_simple=True)
theano.printing.pydotprint(func_map, outfile="func_map.png", var_with_name_simple=True)
theano.printing.pydotprint(func_map2, outfile="func_map2.png", var_with_name_simple=True)

The output file is available at map.png
The output file is available at func_map.png
The output file is available at func_map2.png


![unopt](map.png)
![opt](func_map.png)
![opt2](func_map2.png)

In [8]:
theano.printing.pydotprint(logpt_sum, outfile="sum.png", var_with_name_simple=True)
theano.printing.pydotprint(func_sum, outfile="func_sum.png", var_with_name_simple=True)
theano.printing.pydotprint(func_sum2, outfile="func_sum2.png", var_with_name_simple=True)

The output file is available at sum.png
The output file is available at func_sum.png
The output file is available at func_sum2.png


![unopt](sum.png)
![opt](func_sum.png)
![opt2](func_sum2.png)

In [9]:
func_map2_ = logp_func(logpt_map, m.basic_RVs, profile=True)
func_sum2_ = logp_func(logpt_sum, m.basic_RVs, profile=True)

In [10]:
func_map2_.profile.summary()

Function profiling
  Message: <ipython-input-2-78031b7e3b98>:31
  Time in 0 calls to Function.__call__: 0.000000e+00s
  Total compile time: 2.107468e-01s
    Number of Apply nodes: 19
    Theano Optimizer time: 1.465194e-01s
       Theano validate time: 2.043962e-03s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.086473e-02s
       Import time 0.000000e+00s
       Node make_thunk time 9.767771e-03s
           Node Elemwise{Composite{(Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i0, i1), i2) - Switch(LT(Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i3, i1), i2), Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i0, i1), i2)), Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i3, i1), i2), Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i0, i1), i2)))}}[(0, 1)](TensorConstant{4}, Shape_i{0}.0, TensorConstant{0}, TensorConstan

In [11]:
func_sum2_.profile.summary()

Function profiling
  Message: <ipython-input-2-78031b7e3b98>:31
  Time in 0 calls to Function.__call__: 0.000000e+00s
  Total compile time: 2.195935e-01s
    Number of Apply nodes: 21
    Theano Optimizer time: 1.533418e-01s
       Theano validate time: 2.004147e-03s
    Theano Linker time (includes C, CUDA code generation/compiling): 1.212978e-02s
       Import time 0.000000e+00s
       Node make_thunk time 1.125216e-02s
           Node Elemwise{Composite{Cast{int8}(GE(i0, i1))}}(Elemwise{exp,no_inplace}.0, TensorConstant{0}) time 2.016544e-03s
           Node Elemwise{Composite{(Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i0, i1), i2) - Switch(LT(Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i3, i1), i2), Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i0, i1), i2)), Composite{Switch(LT(i0, i1), i1, i0)}(Composite{Switch(GE(i0, i1), i1, i0)}(i3, i1), i2), Composite{Switch(LT(i0, i1), i1, i0)}(Co

In [12]:
func_map3 = pm.model.ValueGradFunction(logpt_map, m.basic_RVs)
func_sum3 = pm.model.ValueGradFunction(logpt_sum, m.basic_RVs)

In [13]:
theano.printing.pydotprint(func_map3._theano_function, outfile="func_map3.png", var_with_name_simple=True)
theano.printing.pydotprint(func_sum3._theano_function, outfile="func_sum3.png", var_with_name_simple=True)

The output file is available at func_map3.png
The output file is available at func_sum3.png


![opt3](func_map3.png)
![opt3](func_sum3.png)

In [17]:
pm.model??