# Introduction

In [1]:
from collections import OrderedDict
import functools

import math
import torch
from torch.distributions import constraints

%env FUNSOR_TYPECHECK=1
import funsor
from funsor.terms import Funsor, Variable, Number, Lambda, Slice
from funsor.tensor import Tensor
from funsor.domains import Array, Bint, Real, Reals
from funsor.factory import Bound, Fresh, Has, Value, make_funsor, to_funsor
import funsor.ops as ops
from funsor.cnf import Contraction
from funsor.testing import random_tensor
from funsor.interpretations import reflect, memoize

funsor.set_backend("torch")
torch.set_default_dtype(torch.float32)

env: FUNSOR_TYPECHECK=1


# Informal Overview

## Named Tensors

In [15]:
A = Tensor(
    torch.tensor([[3., 1., 4.],
                  [1., 5., 9.],
                  [2., 6., 5.]])
)["height", "width"]

In [16]:
A(height=0)

Tensor(tensor([3., 1., 4.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [17]:
A(width=2)

Tensor(tensor([4., 9., 5.]), OrderedDict([('height', Bint[3, ])]), 'real')

## Named tensor operations

### Elementwise operations and broadcasting

In [5]:
A.sigmoid()  # 1 / (1 + (-A).exp())

Tensor(tensor([[0.9526, 0.7311, 0.9820],
        [0.7311, 0.9933, 0.9999],
        [0.8808, 0.9975, 0.9933]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [6]:
x = Tensor(torch.tensor([2., 7., 1.]))["height"]

y = Tensor(torch.tensor([1., 4., 1.]))["width"]

In [7]:
A + x

Tensor(tensor([[ 5.,  3.,  6.],
        [ 8., 12., 16.],
        [ 3.,  7.,  6.]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [8]:
A + y

Tensor(tensor([[ 4.,  5.,  5.],
        [ 2.,  9., 10.],
        [ 3., 10.,  6.]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

### Reductions

In [28]:
A.reduce(ops.add, reduced_vars="height")

Tensor(tensor([ 6., 12., 18.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [32]:
# A2.reduce(ops.add, "hw")

In [10]:
A.reduce(ops.add, reduced_vars="width")

Tensor(tensor([ 8., 15., 13.]), OrderedDict([('height', Bint[3, ])]), 'real')

In [11]:
A.reduce(ops.add, reduced_vars={"height", "width"})

Tensor(36.0, OrderedDict(), 'real')

In [12]:
(A * y).reduce(ops.add, reduced_vars="width")

Tensor(tensor([11., 30., 31.]), OrderedDict([('height', Bint[3, ])]), 'real')

In [13]:
(x * x).reduce(ops.add, reduced_vars="height")  # innder product
x * y  # outer product
(A * y).reduce(ops.add, reduced_vars="width")  # matrix-vector product
# vector-matrix product is the same as matrix-vector product
(A * x).reduce(ops.add, reduced_vars="height")  # vector-matrix product

Tensor(tensor([15., 43., 76.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [14]:
B = Tensor(
    torch.tensor([[3, 2, 5],
                  [5, 4, 0],
                  [8, 3, 6]]),
)["width", "width2"]

In [15]:
(A * B).reduce(ops.add, reduced_vars="width")  # matrix-matrix product

Tensor(tensor([[ 46.,  22.,  39.],
        [100.,  49.,  59.],
        [ 76.,  43.,  40.]]), OrderedDict([('height', Bint[3, ]), ('width2', Bint[3, ])]), 'real')

### Renaming and reshaping

In [18]:
A(height="height2")

Tensor(tensor([[3., 1., 4.],
        [1., 5., 9.],
        [2., 6., 5.]]), OrderedDict([('height2', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [20]:
# str <-> Variable(str, ...) most of the time
A(height=Variable("height2", Bint[3]))

Tensor(tensor([[3., 1., 4.],
        [1., 5., 9.],
        [2., 6., 5.]]), OrderedDict([('height2', Bint[3, ]), ('width', Bint[3, ])]), 'real')

# Examples

## Building blocks

### Feedforward neural networks

In [17]:
input_dim = 100
X0 = random_tensor(
    OrderedDict([("input_layer", Bint[input_dim])])
)

hidden_1_dim = 32
W1 = random_tensor(
    OrderedDict([
        ("input_layer", Bint[input_dim]),
        ("hidden_layer_1", Bint[hidden_1_dim])
    ])
)
b1 = random_tensor(
    OrderedDict([("hidden_layer_1", Bint[hidden_1_dim])])
)
X1 = ((W1 * X0).reduce(ops.add, "input_layer") + b1).sigmoid()

hidden_2_dim = 16
W2 = random_tensor(
    OrderedDict([
        ("hidden_layer_1", Bint[hidden_1_dim]),
        ("hidden_layer_2", Bint[hidden_2_dim])
    ])
)
b2 = random_tensor(
    OrderedDict([("hidden_layer_2", Bint[hidden_2_dim])])
)
X2 = ((W2 * X1).reduce(ops.add, "hidden_layer_1") + b2).sigmoid()

hidden_3_dim = 8
W3 = random_tensor(
    OrderedDict([
        ("hidden_layer_2", Bint[hidden_2_dim]),
        ("hidden_layer_3", Bint[hidden_3_dim])
    ])
)
b3 = random_tensor(
    OrderedDict([("hidden_layer_3", Bint[hidden_3_dim])])
)
X3 = ((W3 * X2).reduce(ops.add, "hidden_layer_2") + b3).sigmoid()

In [9]:
@make_funsor
def FullConnLayer(
    x: Has[{"layer"}],
    W: Has[{"layer"}],
    b: Funsor,
    layer: Bound
) -> Fresh[lambda x: x]:
    result = ((W * x).reduce(ops.add, layer) + b).sigmoid()
    return result

In [10]:
input_dim = 100
X0 = random_tensor(
    OrderedDict([("layer", Bint[input_dim])])
)
hidden_1_dim = 32
W1 = random_tensor(
    OrderedDict([
        ("layer", Bint[input_dim]),
        ("out_layer", Bint[hidden_1_dim])
    ])
)
b1 = random_tensor(
    OrderedDict([("out_layer", Bint[hidden_1_dim])])
)

X1 = FullConnLayer(X0, W1, b1, "layer")
X1

Tensor(tensor([5.1250e-02, 3.9130e-01, 2.1275e-02, 9.1966e-07, 1.1757e-01, 9.2585e-09,
        2.8901e-01, 2.3617e-02, 9.9739e-01, 9.9966e-01, 1.2796e-05, 7.0259e-01,
        8.9353e-05, 1.0000e+00, 6.9404e-01, 7.0413e-05, 4.4894e-01, 1.4566e-02,
        9.8803e-01, 9.7485e-01, 9.9999e-01, 9.8401e-01, 5.6814e-02, 9.9978e-01,
        8.4199e-06, 7.8148e-01, 1.0000e+00, 9.5772e-01, 3.3361e-01, 4.8448e-05,
        2.9235e-07, 6.8713e-02]), OrderedDict([('out_layer', Bint[32, ])]), 'real')

### Recurrent neural networks

In [None]:
@make_funsor
def RecurrentLayer(
    x: Funsor,
    Wh: Funsor,
    Wi: Funsor,
    b: Funsor,
    hidden: Bound,
    input: Bound
) -> Fresh[lambda x: x]:
    output = ((Wh * h).reduce(ops.add, "hidden") + (Wi * x).reduce(ops.add, "input") + b).sigmoid()
    return output(hidden="new_hidden")

### Attention

In [67]:
@make_funsor
def Softmax(
    x: Funsor,
    ax: Bound,
    ax2: Fresh[lambda ax: ax]
) -> Fresh[lambda x: x]:
    x = x(**{ax.name: ax2.name})
    y = x - x.reduce(ops.logaddexp, ax2)
    return y.exp()

In [68]:
q = random_tensor(OrderedDict([("key", Bint[10])]))
Softmax(q, "key", "key2")

Tensor(tensor([0.0568, 0.1457, 0.0798, 0.0350, 0.0371, 0.2761, 0.2280, 0.0646, 0.0557,
        0.0213]), OrderedDict([('key2', Bint[10, ])]), 'real')

In [69]:
@make_funsor
def Attention(
    Q: Has[{"key"}],
    K: Has[{"key", "seq"}],
    V: Has[{"seq2"}],
    M: Has[{"seq"}],
    key: Bound,
    seq: Bound,
    seq2: Bound
) -> Fresh[lambda Q: Q]:
    x = (Q * K).reduce(ops.add, key) / math.sqrt(key.output.size) + M
    return (Softmax(x, seq, seq2) * V).reduce(ops.add, seq2)

In [70]:
q = random_tensor(OrderedDict([("key", Bint[10])]))
k = random_tensor(OrderedDict([("key", Bint[10]), ("seq", Bint[3])]))
v = random_tensor(OrderedDict([("seq2", Bint[3]), ("val", Bint[5])]))
m = random_tensor(OrderedDict([("seq", Bint[3])]))
Attention(q, k, v, m, "key", "seq", "seq2")

Tensor(tensor([ 0.2047, -0.2971, -0.8347, -0.6226, -0.0110]), OrderedDict([('val', Bint[5, ])]), 'real')

### Convolution

In [22]:
@make_funsor
def Unroll(
    x: Has[{"seq"}],
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size - k + 1]]
) -> Fresh[lambda x: x]:
    return x(**{seq.name: seq2 + kernel})

In [30]:
@make_funsor
def Unroll2(
    x: Funsor,
    seq: Bound,
    kernel: Funsor,
    seq2: Fresh[lambda seq, kernel: Bint[seq.size - kernel.size + 1]]
) -> Fresh[lambda x: x]:
    return x(**{seq.name: seq2 + kernel})

In [33]:
@make_funsor
def Conv1d(
    X: Has[{"chans", "seq"}],
    W: Has[{"chans", "kernel"}],
    b: Funsor,
    chans: Bound,
    k: Value[int],
    kernel: Bound,
    seq: Bound,
    seq2: Fresh[lambda seq, k: Bint[seq.size - k + 1]]
) -> Fresh[lambda X: X]:
    y = W * Unroll(X, seq, k, kernel, seq2)
    return y.reduce(ops.add, frozenset({chans, kernel})) + b

In [34]:
x = random_tensor(OrderedDict([("chans", Bint[3]), ("seq", Bint[10])]))
kernel = Variable("kernel", Bint[3])
w = random_tensor(OrderedDict([("chans", Bint[3]), ("kernel", Bint[3])]))
b = random_tensor(OrderedDict([]))

In [35]:
Conv1d(x, w, b, "chans", 3, "kernel", "seq", "seq2")

Tensor(tensor([1.1559, 2.6513, 1.8861, 1.7262, 2.6523, 3.2156, 6.6995, 0.6863]), OrderedDict([('seq2', Bint[8, ])]), 'real')

In [36]:
@make_funsor
def Conv2d(
    X: Has[{"chans", "height", "width"}],
    W: Has[{"chans", "kh", "kw"}],
    b: Funsor,
    chans: Bound,
    kh_size: Value[int],
    kh: Bound,
    height: Bound,
    height2: Fresh[lambda height, kh_size: Bint[height.size - kh_size + 1]],
    kw_size: Value[int],
    kw: Bound,
    width: Bound,
    width2: Fresh[lambda width, kw_size: Bint[width.size - kw_size + 1]]
) -> Fresh[lambda X: X]:
    y = W * Unroll(Unroll(X, width, kw_size, kw, width2), height, kh_size, kh, height2)
    return y.reduce(ops.add, frozenset({chans, kh, kw})) + b

In [37]:
x = random_tensor(OrderedDict([("chans", Bint[3]), ("height", Bint[10]), ("width", Bint[8])]))
w = random_tensor(OrderedDict([("chans", Bint[3]), ("kh", Bint[3]), ("kw", Bint[4])]))
b = random_tensor(OrderedDict([]))

Conv2d(x, w, b, "chans", 3, "kh", "height", "height2", 4, "kw", "width", "width2")

Tensor(tensor([[  7.5981,   2.8738,  -4.6288,  -2.5880,   1.3028],
        [-12.0374,   0.9402,   2.3095,   5.8484,   2.0857],
        [ -4.9819,   4.1024,  -1.9617,  -6.0453,  -1.8697],
        [ -6.6176,  -1.8181,   1.5590,   3.1369,  -4.0031],
        [-11.5290,   2.9589,   6.5625,  -1.1461,  -4.2302],
        [ -0.7835,   3.8283,  -6.6212,   2.4097,  11.8789],
        [  4.7318,  -7.4386,  -0.3856,   2.9603,  15.1632],
        [  2.6932,   2.0555,  10.2560, -12.5641, -13.1350]]), OrderedDict([('height2', Bint[8, ]), ('width2', Bint[5, ])]), 'real')

### Max pooling

In [38]:
@make_funsor
def Pool(
    x: Has[{"seq"}],
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size // k]], # seq -> Bint[]
) -> Fresh[lambda x: x]: # x -> x.output (Bint[] or Real)
    assert not seq.output.size % k
    return x(**{seq.name: seq2 * Number(k, k+1) + kernel})

In [39]:
X = random_tensor(OrderedDict([("seq", Bint[10])]))
Y = Pool(X, "seq", 2, "kernel", "seq2")
Y

Tensor(tensor([[-1.8575,  1.0439],
        [-0.1903,  0.5067],
        [ 1.2351,  0.6842],
        [-0.2534,  0.5630],
        [-0.4049,  0.2481]]), OrderedDict([('seq2', Bint[5, ]), ('kernel', Bint[2, ])]), 'real')

In [40]:
@make_funsor
def MaxPool1d(
    X: Has[{"seq"}],
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size // k]]
) -> Fresh[lambda X: X]:
    return Pool(X, seq, k, kernel, seq2).reduce(ops.max, kernel)

In [41]:
X = random_tensor(OrderedDict([("seq", Bint[10])]))
Y = MaxPool1d(X, "seq", 2, "kernel", "seq2")
Y

Tensor(tensor([ 0.9910,  0.4459,  0.9832,  0.4149, -0.0399]), OrderedDict([('seq2', Bint[5, ])]), 'real')

In [42]:
@make_funsor
def MaxPool2d(
    X: Has[{"height", "width"}],
    height: Bound,
    kh_size: Value[int],
    kh: Fresh[lambda kh_size: Bint[kh_size]],
    height2: Fresh[lambda height, kh_size: Bint[height.size // kh_size]],
    width: Bound,
    kw_size: Value[int],
    kw: Fresh[lambda kw_size: Bint[kw_size]],
    width2: Fresh[lambda width, kw_size: Bint[width.size // kw_size]],
) -> Fresh[lambda X: X]:
    y = Pool(Pool(X, height, kh_size, kh, height2), width, kw_size, kw, width2)
    return y.reduce(ops.max, frozenset({kh, kw}))

In [43]:
X = random_tensor(OrderedDict([("width", Bint[9]), ("height", Bint[4])]))
Y = MaxPool2d(X, "height", 2, "kh", "height2", "width", 3, "kw", "width2")
Y

Tensor(tensor([[1.3635, 1.2425],
        [1.7722, 1.2450],
        [1.3647, 0.3712]]), OrderedDict([('width2', Bint[3, ]), ('height2', Bint[2, ])]), 'real')

In [44]:
@make_funsor
def Pool2(
    x: Funsor,
    seq: Bound,
    kernel: Funsor,
    seq2: Fresh[lambda seq, kernel: Bint[seq.size // kernel.size]], # seq -> Bint[]
) -> Fresh[lambda x: x]: # x -> x.output (Bint[] or Real)
    return x(**{seq.name: seq2 * Number(kernel.output.size, kernel.output.size+1) + kernel})

### Normalization layers

In [None]:
# Pytorch
x.sum(dim=0)
x.mean(dim=0)

# Funsor
x.reduce(ops.add, "i")
x.reduce(ops.mean, "i") # Wrong
x.mean("i")

@make_funsor
def Mean(
    X: Funsor,
    ax: Bound
) -> Fresh[lambda X: X]:
    return ops.mean(funsor.terms.Lambda(X, ax), 0)

In [16]:
@make_funsor
def Mean(
    X: Has[{"ax"}],
    ax: Bound
) -> Fresh[lambda X: X]:
    return X.reduce(ops.add, ax) / ax.output.size

@make_funsor
def Mean2(
    X: Has[{"ax", "ax2"}],
    ax: Bound,
    ax2: Bound
) -> Fresh[lambda X: X]:
    return X.reduce(ops.add, frozenset({ax, ax2})) / (ax.output.size * ax2.output.size)

@make_funsor
def Variance(
    X: Has[{"ax"}],
    ax: Bound
) -> Fresh[lambda X: X]:
    return Mean((X - Mean(X, ax))**2, ax)


@make_funsor
def Variance2(
    X: Has[{"ax", "ax2"}],
    ax: Bound,
    ax2: Bound
) -> Fresh[lambda X: X]:
    return Mean2((X - Mean2(X, ax, ax2))**2, ax, ax2)

@make_funsor
def Standardize(
    X: Has[{"ax"}],
    ax: Bound,
    new_ax: Fresh[lambda ax: ax]
) -> Fresh[lambda X: X]:
    y = X(**{ax.name: new_ax})
    return (y - Mean(X, ax)) / (Variance(X, ax) + ops.finfo(X.data).eps).sqrt()

@make_funsor
def Standardize2(
    X: Has[{"ax", "ax2"}],
    ax: Bound,
    ax2: Bound,
    new_ax: Fresh[lambda ax: ax],
    new_ax2: Fresh[lambda ax2: ax2]
) -> Fresh[lambda X: X]:
    y = X(**{ax.name: new_ax, ax2.name: new_ax2})
    return (y - Mean2(X, ax, ax2)) / (Variance2(X, ax, ax2) + ops.finfo(X.data).eps).sqrt()

In [26]:
@make_funsor
def BatchNorm(
    X: Has[{"batch", "layer"}],
    gamma: Funsor,
    beta: Funsor,
    batch: Bound,
    layer: Bound,
    batch2: Fresh[lambda batch: batch],
    layer2: Fresh[lambda layer: layer]
) -> Fresh[lambda X: X]:
    return Standardize2(X, batch, layer, batch2, layer2) * gamma + beta

@make_funsor
def InstanceNorm(
    X: Has[{"layer"}],
    gamma: Funsor,
    beta: Funsor,
    layer: Bound,
    layer2: Fresh[lambda layer: layer]
) -> Fresh[lambda X: X]:
    return Standardize(X, layer, layer2) * gamma + beta

# same as BatchNorm
@make_funsor
def LayerNorm(
    X: Has[{"chans", "layer"}],
    gamma: Funsor,
    beta: Funsor,
    chans: Bound,
    layer: Bound,
    chans2: Fresh[lambda chans: chans],
    layer2: Fresh[lambda layer: layer]
) -> Fresh[lambda X: X]:
    return Standardize2(X, chans, layer, chans2, layer2) * gamma + beta

In [27]:
x = random_tensor(OrderedDict([("batch", Bint[4]), ("chans", Bint[3]), ("layer", Bint[5])]))
g = random_tensor(OrderedDict([("chans", Bint[3])]))
b = random_tensor(OrderedDict([("chans", Bint[3])]))

BatchNorm(x, g, b, "batch", "layer", "batch2", "layer2")

Tensor(tensor([[[ 0.7576, -1.2169, -0.7666, -1.5257, -0.3424],
         [ 0.9459,  1.8852,  2.5786,  0.3519,  0.5665],
         [-0.4537, -0.0716,  0.8138,  0.2152,  0.7517]],

        [[-0.4904,  1.3478, -1.4733, -0.1942, -0.4029],
         [ 1.7668,  0.4327,  1.5611,  1.6400,  0.1874],
         [ 0.1398, -0.2263,  1.2294, -0.0144,  0.0602]],

        [[ 0.5309, -0.6889, -0.2520,  0.0214, -2.1402],
         [ 1.0447, -1.0365,  1.2881,  1.8869, -0.1701],
         [ 1.0926, -0.0163,  1.3467,  1.1819, -0.6966]],

        [[-0.9197, -0.9453, -1.2733, -1.6563,  0.0826],
         [ 0.4735, -0.4679, -1.1864, -1.6190,  1.5410],
         [ 0.7206, -0.1800, -0.3657,  0.1576,  0.2392]]]), OrderedDict([('batch2', Bint[4, ]), ('chans', Bint[3, ]), ('layer2', Bint[5, ])]), 'real')

In [28]:
InstanceNorm(x, g, b, "layer", "layer2")

Tensor(tensor([[[ 0.8866, -1.2136, -0.7346, -1.5421, -0.2834],
         [ 0.2589,  1.5063,  2.4273, -0.5299, -0.2449],
         [-0.5643, -0.0977,  0.9832,  0.2524,  0.9074]],

        [[-0.8083,  0.9048, -1.7243, -0.5323, -0.7268],
         [ 1.7700, -0.4627,  1.4257,  1.5577, -0.8731],
         [ 0.1829, -0.2404,  1.4430,  0.0046,  0.0909]],

        [[ 0.3900, -0.7483, -0.3406, -0.0854, -2.1027],
         [ 1.1500, -1.0459,  1.4068,  2.0386, -0.1318],
         [ 0.6735, -0.1453,  0.8610,  0.7394, -0.6475]],

        [[-0.5441, -0.5816, -1.0625, -1.6239,  0.9253],
         [ 1.3921,  0.4723, -0.2296, -0.6522,  2.4350],
         [ 1.2515, -0.1676, -0.4602,  0.3643,  0.4930]]]), OrderedDict([('batch', Bint[4, ]), ('chans', Bint[3, ]), ('layer2', Bint[5, ])]), 'real')

## Transformer

## LeNet

In [79]:
@make_funsor
def Relu(
    X: Funsor
) -> Fresh[lambda X: X]:
    return ops.max(X, Number(0.0))

In [80]:
W1 = random_tensor(
    OrderedDict([
        ("chans", Bint[3]),
        ("kh", Bint[3]),
        ("kw", Bint[4]),
        ("chans2", Bint[3])
    ]),
)
b1 = random_tensor(OrderedDict([("chans2", Bint[3])]))
W3 = random_tensor(
    OrderedDict([
        ("hidden", Bint[3]),
        ("height3", Bint[4]),
        ("width3", Bint[4]),
        ("chans2", Bint[3])
    ]),
)
b3 = random_tensor(OrderedDict([("hidden", Bint[3])]))
W4 = random_tensor(
    OrderedDict([
        ("hidden", Bint[3]),
        ("classes", Bint[5]),
    ]),
)
b4 = random_tensor(OrderedDict([("classes", Bint[5])]))

X0 = random_tensor(
    OrderedDict([
        ("batch", Bint[4]),
        ("chans", Bint[3]),
        ("height", Bint[14]),
        ("width", Bint[15])
    ])
)

T1 = Relu(
    Conv2d(X0, W1, b1, "chans", 3, "kh", "height", "height2", 4, "kw", "width", "width2")
)
X1 = MaxPool2d(T1, "height2", 3, "kh", "height3", "width2", 3, "kw", "width3")
X3 = (W3 * X1).reduce(ops.add, frozenset({"height3", "width3", "chans2"})) + b3
O = Softmax(((W4 * X3).reduce(ops.add, "hidden") + b4), "classes", "classes2")

In [81]:
O

Tensor(tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 3.2374e-23, 2.6446e-35, 5.3192e-34],
        [1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00]]), OrderedDict([('classes2', Bint[5, ]), ('batch', Bint[4, ])]), 'real')

## Other examples

### Discrete random variables

In [55]:
B_given_A = Tensor(
    torch.tensor([[0.2, 0.3, 0.5],
                  [0.8, 0.1, 0.1]]),
)["A", "B"]

A = Tensor(
    torch.tensor([0.6, 0.4]),
)["A"]

In [49]:
# chain rule
A_and_B = B_given_A * A
A_and_B

Tensor(tensor([[0.1200, 0.1800, 0.3000],
        [0.3200, 0.0400, 0.0400]]), OrderedDict([('A', Bint[2, ]), ('B', Bint[3, ])]), 'real')

In [50]:
# marginalization
B = A_and_B.reduce(ops.add, "A")
B

Tensor(tensor([0.4400, 0.2200, 0.3400]), OrderedDict([('B', Bint[3, ])]), 'real')

In [51]:
# Bayes' rule
A_given_B = A_and_B / B
A_given_B

Tensor(tensor([[0.2727, 0.8182, 0.8824],
        [0.7273, 0.1818, 0.1176]]), OrderedDict([('A', Bint[2, ]), ('B', Bint[3, ])]), 'real')

### Advanced indexing

In [83]:
E = random_tensor(
    OrderedDict([
        ("vocab", Bint[5]),
        ("emb", Bint[3]),
    ]),
)
E

Tensor(tensor([[ 1.4886,  1.0406, -0.1764],
        [-1.1459, -0.2191,  0.2646],
        [-0.8871, -0.0130,  1.0426],
        [ 0.3290,  0.2357, -0.4487],
        [ 1.6252,  1.9409,  0.5970]]), OrderedDict([('vocab', Bint[5, ]), ('emb', Bint[3, ])]), 'real')

In [85]:
# partial indexing
E(vocab=2)

Tensor(tensor([-0.8871, -0.0130,  1.0426]), OrderedDict([('emb', Bint[3, ])]), 'real')

In [106]:
# integer array indexing
I = Tensor(torch.tensor([3, 2, 4, 0]), dtype=5)["seq"]
E(vocab=I)

Tensor(tensor([[ 0.3290,  0.2357, -0.4487],
        [-0.8871, -0.0130,  1.0426],
        [ 1.6252,  1.9409,  0.5970],
        [ 1.4886,  1.0406, -0.1764]]), OrderedDict([('seq', Bint[4, ]), ('emb', Bint[3, ])]), 'real')

In [108]:
# gather
P = random_tensor(
    OrderedDict([
        ("vocab", Bint[5]),
        ("seq", Bint[4]),
    ]),
)
P

Tensor(tensor([[-2.8599, -1.0175, -0.1075, -1.0871],
        [-0.3646,  0.2439,  0.5320,  0.3238],
        [-0.3164,  0.1753, -0.2921, -0.9764],
        [ 2.2078,  0.2989, -0.2367, -0.8313],
        [-2.0726,  0.1750,  2.4904,  0.2678]]), OrderedDict([('vocab', Bint[5, ]), ('seq', Bint[4, ])]), 'real')

In [109]:
P(vocab=I)

Tensor(tensor([ 2.2078,  0.1753,  2.4904, -1.0871]), OrderedDict([('seq', Bint[4, ])]), 'real')

In [110]:
I1 = Tensor(torch.tensor([1, 2, 0]), dtype=4)["subseq"]
I2 = Tensor(torch.tensor([3, 0, 4]), dtype=5)["subseq"]
P(seq=I1, vocab=I2)

Tensor(tensor([ 0.2989, -0.1075, -2.0726]), OrderedDict([('subseq', Bint[3, ])]), 'real')

### Continuous bag of words

In [114]:
X = Tensor(torch.tensor([[0, 1, 1, 0],
                         [1, 0, 0, 0],
                         [0, 0, 0, 1]]),
           dtype=1)["vocab", "seq"]
X.reduce(ops.add, "vocab")

Tensor(tensor([1, 1, 1, 1]), OrderedDict([('seq', Bint[4, ])]), 1)

In [116]:
E = random_tensor(
    OrderedDict([
        ("vocab", Bint[3]),
        ("emb", Bint[4]),
    ]),
)
W = random_tensor(
    OrderedDict([
        ("classes", Bint[5]),
        ("emb", Bint[4]),
    ]),
)

@make_funsor
def CBOW(
    X: Has[{"vocab", "seq"}],
    E: Has[{"emb", "vocab"}],
    W: Has[{"emb", "classes"}],
    emb: Bound,
    vocab: Bound,
    seq: Bound,
    classes: Bound,
    classes2: Fresh[lambda classes: classes]
) -> Fresh[lambda X: X]:
    y = ((W * E).reduce(ops.add, emb) * X).reduce(ops.add, vocab).reduce(ops.add, seq)
    return Softmax(y, classes, classes2)

CBOW(X, E, W, "emb", "vocab", "seq", "classes", "classes2")

Tensor(tensor([7.2377e-01, 9.4771e-05, 2.4277e-01, 2.4577e-02, 8.7923e-03]), OrderedDict([('classes2', Bint[5, ])]), 'real')

### Sudoku ILP

### K-means clustering

In [120]:
X = random_tensor(OrderedDict([("batch", Bint[10]), ("d", Bint[4])]))
C = random_tensor(OrderedDict([("clusters", Bint[3]), ("d", Bint[4])]))

@make_funsor
def Norm(
    X: Has[{"d"}],
    d: Bound
) -> Fresh[lambda X: X]:
    return (X**2).reduce(ops.add, d).sqrt()

In [124]:
# TODO implement reduce for ops.argmin and ops.argmax
Q = Norm(C - X, "d").reduce(ops.argmin, "clusters")

In [None]:
(Q * X / Q).reduce(ops.add, "batch") # ???

### Beam search

### Multivariate normal distribution

In [15]:
@make_funsor
def Determinant(
    F: Funsor,
    ax1: Bound,
    ax2: Bound
) -> Fresh[lambda F: F]:
    assert ax1.output.size == ax2.output.size
    m = ax1.output.size
    if m == 1:
        return F(ax1=0, ax2=0)
    else:
        result = Number(0.0) # FIX ME
        for i in range(m):
            I1 = Tensor(torch.tensor([k for k in range(1, m)]), dtype=m)[ax1.name]
            I2 = Tensor(torch.tensor([k for k in range(m) if k != i]), dtype=m)[ax2.name]
            result += F(ax1=0, ax2=i) * Determinant(F(ax1=I1, ax2=I2))
        return result

In [16]:
X = random_tensor(OrderedDict([("ax1", Bint[2]), ("ax2", Bint[2])]))
Determinant(X, "ax1", "ax2")

AssertionError: Must provide exactly one type per subexpression

# LaTeX Macros

# Formal Definitions

## Records and shapes

## Named tensors

## Named tensor operations

## Common operations

# Differentiation

## Definition

## Rules

## Example

## Broadcasting

# Alternatives

# Extra

In [22]:
# dynamic programming
@make_funsor
def Fibbonaci(
    k: Value[int]
) -> Fresh[Real]:
    if k == 0:
        return Number(0.0)
    if k == 1:
        return Number(1.0)
    return Fibbonaci(k - 1) + Fibbonaci(k - 2)

with memoize():
    y = Fibbonaci(20)
y

Number(6765.0)

In [23]:
Fibbonaci(20)

Number(6765.0)