This notebook demonstrates how models could be constructed using pangolin's more friendly interface, rather than directly using the IR (as demonstrated in `IR.ipynb`).

In [1]:
from pangolin import ir
from pangolin import interface as pi

In [2]:
# In pangolin, an RV is basically just a list of a Distribution and a list of parents
# For example, here's how you could create an RV that is a constant of 0 "with your bare hands"

constant_dist = ir.Constant(0)
constant_rv = ir.RV(constant_dist)

print(repr(constant_rv))

RV(Constant(0))


In [3]:
# That's tedious! So pangolin provides a friendlier interface
# For example, you can create constants using makerv.
# (An InfixRV is just an RV but it also supports operator overloading, as we'll discuss below.)

constant_rv = pi.constant(0)

print(repr(constant_rv))

InfixRV(Constant(0))


In [4]:
# Here's a more complex example of creating a standard-normal RV. Here's how you'd do it with the IR, using your bare hands

constant_0_dist = ir.Constant(0)
constant_0_rv = ir.RV(constant_0_dist)
constant_1_dist = ir.Constant(1)
constant_1_rv = ir.RV(constant_1_dist)
normal_dist = ir.Normal()
normal_rv = ir.RV(normal_dist, constant_0_rv, constant_1_rv)

print(repr(normal_rv))

RV(Normal(), RV(Constant(0)), RV(Constant(1)))


In [5]:
# That's unbelievably tedious.
# Fortunately, pangolin's friendly interface allows you to do the same thing in a much easier way

normal_rv = pi.normal(0, 1)

print(repr(normal_rv))

InfixRV(Normal(), InfixRV(Constant(0)), InfixRV(Constant(1)))


In [6]:
# If you use repr(rv) then you get the full explicit representation as shown above
# If you do str(rv), you get a friendlier-looking math-like notation

print(repr(normal_rv))
print(str(normal_rv))

InfixRV(Normal(), InfixRV(Constant(0)), InfixRV(Constant(1)))
normal(0, 1)


In [7]:
# Here's how you could create a RV that represents the sum of two other RVs using the low-level notation

x = ir.RV(ir.Constant(0))
y = ir.RV(ir.Constant(2))
z = ir.RV(ir.Add(), x, y)

print(repr(z))
print(z)

RV(Add(), RV(Constant(0)), RV(Constant(2)))
add(0, 2)


In [8]:
# And here's how you can do the same thing using the friendly interface

x = pi.constant(0)
y = pi.constant(2)
z = pi.add(x,y)

print(repr(z))
print(z)

InfixRV(Add(), InfixRV(Constant(0)), InfixRV(Constant(2)))
add(0, 2)


In [9]:
# But it's still annoying to write pg.add.
# Fortunately, OperatorRV's have operator overloading. So you can instead just do this:

x = pi.constant(0)
y = pi.constant(2)
z = x + y

print(repr(z))
print(z)

InfixRV(Add(), InfixRV(Constant(0)), InfixRV(Constant(2)))
add(0, 2)


In [10]:
# In pangolin, you can create constants that are vectors or matrices, etc.

z = pi.constant([1,2,3])

print(repr(z))
print(z)

InfixRV(Constant([1,2,3]))
[1 2 3]


In [11]:
# Pangolin also allows you to work with vectors or matrices, and supports matrix multiplication and all that kind of stuff

x = pi.constant([[2,1],[0,3]])
y = pi.constant([3,-3])
z = x @ y

print(repr(z))
print(z)

InfixRV(Matmul(), InfixRV(Constant([[2,1],[0,3]])), InfixRV(Constant([3,-3])))
matmul([[2 1] [0 3]], [ 3 -3])


In [12]:
# How would you create a diagonal normal distribution?
# in pangolin, pg.ir.Normal represents a scalar distribution ONLY
# if you want a vectorized distribution, you need to "vmap" it
# here's how you'd do that in the raw IR

vectorized_normal = ir.VMap(ir.Normal(), (0, 0)) # (0,0) means vectorize loc and scale
loc = ir.RV(ir.Constant([0,1,2]))
scale = ir.RV(ir.Constant([3,4,5]))
z = ir.RV(vectorized_normal, loc, scale)

print(repr(z))
print(z)
ir.print_upstream(z)

RV(VMap(Normal(), (0, 0)), RV(Constant([0,1,2])), RV(Constant([3,4,5])))
vmap(normal, (0, 0))([0 1 2], [3 4 5])
shape | statement
----- | ---------
(3,)  | a = [0 1 2]
(3,)  | b = [3 4 5]
(3,)  | c ~ vmap(normal, (0, 0))(a,b)


In [13]:
# And here's how you'd do that in a convenient way

loc = pi.constant([0,1,2])
scale = pi.constant([3,4,5])

z = pi.vmap(pi.normal)(loc, scale)

print(repr(z))
print(z)
ir.print_upstream(z)

InfixRV(VMap(Normal(), (0, 0), 3), InfixRV(Constant([0,1,2])), InfixRV(Constant([3,4,5])))
vmap(normal, (0, 0), 3)([0 1 2], [3 4 5])
shape | statement
----- | ---------
(3,)  | a = [0 1 2]
(3,)  | b = [3 4 5]
(3,)  | c ~ vmap(normal, (0, 0), 3)(a,b)


In [14]:
# The same thing is true for addition. pg.ir.Add() only adds scalars.
# But you can vmap it to add vectors.
# Here's how you'd do that in a very explicit way

vectorized_add = ir.VMap(ir.Add(), (0, 0))
x = ir.RV(ir.Constant([0,1,2]))
y = ir.RV(ir.Constant([3,4,5]))
z = ir.RV(vectorized_add, x, y)

print(repr(z))
print(z)
ir.print_upstream(z)

RV(VMap(Add(), (0, 0)), RV(Constant([0,1,2])), RV(Constant([3,4,5])))
vmap(add, (0, 0))([0 1 2], [3 4 5])
shape | statement
----- | ---------
(3,)  | a = [0 1 2]
(3,)  | b = [3 4 5]
(3,)  | c = vmap(add, (0, 0))(a,b)


In [15]:
# As always, directly writing in the IR is very tedious
# Here's how you could do that using the friendly interface

vectorized_add = pi.vmap(pi.add, (0, 0))
x = pi.makerv([0,1,2])
y = pi.makerv([3,4,5])
z = vectorized_add(x, y)

ir.print_upstream(z)

shape | statement
----- | ---------
(3,)  | a = [0 1 2]
(3,)  | b = [3 4 5]
(3,)  | c = vmap(add, (0, 0), 3)(a,b)


In [16]:
# Alternatively (and much simpler), if you simply do x+y when x and y are vectors
# pangolin will do the broadcasting / vmapping for you

x = pi.constant([0,1,2])
y = pi.constant([3,4,5])
z = x + y

ir.print_upstream(z)

shape | statement
----- | ---------
(3,)  | a = [0 1 2]
(3,)  | b = [3 4 5]
(3,)  | c = vmap(add, (0, 0), 3)(a,b)


In [17]:
# By default, Pangolin only supports "simple" broadcasting as in Stan or JAGS:
# 1. only for "all-scalar ops" (all inputs and output)
# 2. all inputs must either be scalar or exactly the same shape
#
# So, for example, you can't add a 2x2 matrix to a length-2 vector.

try:
    x = pi.constant([[2,1],[0,3]])
    y = pi.constant([3,-3])
    z = x + y
except ValueError as e:
    print("Got ValueError (as expected)", e)

Got ValueError (as expected) Can't broadcast non-matching shapes (2,) and (2, 2)


In [18]:
# If you want, you can turn broadcasting off by:
# 1. setting the environmental variable SCALAR_BROADCASTING to "off" before importing
# 2. setting pi.SCALAR_BROADCASTING[0] to "off" (after importing)
# 3. temporarily with a context manager

with pi.ScalarBroadcasting("off"):
    assert pi.SCALAR_BROADCASTING == ["off"]
    try:
        x = pi.constant([0,1,2])
        y = pi.constant([3,4,5])
        z = x+y
    except ValueError as e:
        print("Got (expected) ValueError:", e)

Got (expected) ValueError: Add op got parent shapes ((3,), (3,)) not all scalar.


In [19]:
# Alternatively you can enable numpy-style broadcasting

with pi.ScalarBroadcasting("numpy"):
    x = pi.constant([[2,1],[0,3]])
    y = pi.constant([3,-3])
    z = x + y

pi.print_upstream(z)

shape  | statement
------ | ---------
(2, 2) | a = [[2 1] [0 3]]
(2,)   | b = [ 3 -3]
(2, 2) | c = vmap(vmap(add, (0, 0), 2), (0, None), 2)(a,b)


In [20]:
# crucially, no matter what style of broadcasting you use, it is ENTIRELY
# the job of the interface. The IR does not know about broadcasting at all.
# It only sees vmapped Ops.