# Introduction

In [1]:
from collections import OrderedDict
import functools

import math
import torch
from torch.distributions import constraints

%env FUNSOR_TYPECHECK=1
import funsor
from funsor.terms import Funsor, Variable, Number
from funsor.tensor import Tensor
from funsor.domains import Array, Bint, Real, Reals
from funsor.factory import Bound, Fresh, Has, Value, make_funsor, to_funsor
import funsor.ops as ops
from funsor.cnf import Contraction
from funsor.testing import random_tensor
from funsor.interpretations import reflect

funsor.set_backend("torch")
torch.set_default_dtype(torch.float32)

env: FUNSOR_TYPECHECK=1


# Informal Overview

## Named Tensors

In [2]:
A = Tensor(
    torch.tensor([[3., 1., 4.],
                  [1., 5., 9.],
                  [2., 6., 5.]])
)["height", "width"]

In [3]:
A(height=0)

Tensor(tensor([3., 1., 4.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [4]:
A(width=2)

Tensor(tensor([4., 9., 5.]), OrderedDict([('height', Bint[3, ])]), 'real')

## Named tensor operations

### Elementwise operations and broadcasting

In [5]:
A.sigmoid()  # 1 / (1 + (-A).exp())

Tensor(tensor([[0.9526, 0.7311, 0.9820],
        [0.7311, 0.9933, 0.9999],
        [0.8808, 0.9975, 0.9933]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [6]:
x = Tensor(
    torch.tensor([2., 7., 1.]),
    inputs=OrderedDict([("height", Bint[3])]),
    dtype="real"
)

y = Tensor(
    torch.tensor([1., 4., 1.]),
    inputs=OrderedDict([("width", Bint[3])]),
    dtype="real"
)

In [7]:
A + x

Tensor(tensor([[ 5.,  3.,  6.],
        [ 8., 12., 16.],
        [ 3.,  7.,  6.]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [8]:
A + y

Tensor(tensor([[ 4.,  5.,  5.],
        [ 2.,  9., 10.],
        [ 3., 10.,  6.]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

### Reductions

In [9]:
A.reduce(ops.add, reduced_vars="height")

Tensor(tensor([ 6., 12., 18.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [10]:
A.reduce(ops.add, reduced_vars="width")

Tensor(tensor([ 8., 15., 13.]), OrderedDict([('height', Bint[3, ])]), 'real')

In [11]:
A.reduce(ops.add, reduced_vars={"height", "width"})

Tensor(36.0, OrderedDict(), 'real')

In [12]:
(A * y).reduce(ops.add, reduced_vars="width")

Tensor(tensor([11., 30., 31.]), OrderedDict([('height', Bint[3, ])]), 'real')

In [13]:
(x * x).reduce(ops.add, reduced_vars="height")  # innder product
x * y  # outer product
(A * y).reduce(ops.add, reduced_vars="width")  # matrix-vector product
# vector-matrix product is the same as matrix-vector product
(A * x).reduce(ops.add, reduced_vars="height")  # vector-matrix product

Tensor(tensor([15., 43., 76.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [15]:
B = Tensor(
    torch.tensor([[3, 2, 5], [5, 4, 0], [8, 3, 6]]),
)["width", "width2"]

In [16]:
(A * B).reduce(ops.add, reduced_vars="width")  # matrix-matrix product

Tensor(tensor([[ 46.,  22.,  39.],
        [100.,  49.,  59.],
        [ 76.,  43.,  40.]]), OrderedDict([('height', Bint[3, ]), ('width2', Bint[3, ])]), 'real')

### Renaming and reshaping

In [17]:
A(height="height2")

Tensor(tensor([[3., 1., 4.],
        [1., 5., 9.],
        [2., 6., 5.]]), OrderedDict([('height2', Bint[3, ]), ('width', Bint[3, ])]), 'real')

# Examples

## Building blocks

### Feedforward neural networks

In [18]:
input_dim = 100
X0 = random_tensor(
    OrderedDict([("input_layer", Bint[input_dim])])
)

hidden_1_dim = 32
W1 = random_tensor(
    OrderedDict([
        ("input_layer", Bint[input_dim]),
        ("hidden_layer_1", Bint[hidden_1_dim])
    ])
)
b1 = random_tensor(
    OrderedDict([("hidden_layer_1", Bint[hidden_1_dim])])
)
X1 = ((W1 * X0).reduce(ops.add, "input_layer") + b1).sigmoid()

hidden_2_dim = 16
W2 = random_tensor(
    OrderedDict([
        ("hidden_layer_1", Bint[hidden_1_dim]),
        ("hidden_layer_2", Bint[hidden_2_dim])
    ])
)
b2 = random_tensor(
    OrderedDict([("hidden_layer_2", Bint[hidden_2_dim])])
)
X2 = ((W2 * X1).reduce(ops.add, "hidden_layer_1") + b2).sigmoid()

hidden_3_dim = 8
W3 = random_tensor(
    OrderedDict([
        ("hidden_layer_2", Bint[hidden_2_dim]),
        ("hidden_layer_3", Bint[hidden_3_dim])
    ])
)
b3 = random_tensor(
    OrderedDict([("hidden_layer_3", Bint[hidden_3_dim])])
)
X3 = ((W3 * X2).reduce(ops.add, "hidden_layer_2") + b3).sigmoid()

In [19]:
@make_funsor
def FullConnLayer(
    x: Has[{"layer"}],
    W: Has[{"layer"}],
    b: Funsor,
    layer: Bound
) -> Fresh[lambda x: x]:
    result = ((W * x).reduce(ops.add, layer) + b).sigmoid()
    return result

In [20]:
input_dim = 100
X0 = random_tensor(
    OrderedDict([("layer", Bint[input_dim])])
)
hidden_1_dim = 32
W1 = random_tensor(
    OrderedDict([
        ("layer", Bint[input_dim]),
        ("out_layer", Bint[hidden_1_dim])
    ])
)
b1 = random_tensor(
    OrderedDict([("out_layer", Bint[hidden_1_dim])])
)

X1 = FullConnLayer(X0, W1, b1, "layer")
X1

Tensor(tensor([9.7559e-02, 1.0285e-03, 1.4649e-02, 1.9483e-02, 9.9043e-01, 9.7151e-01,
        1.0000e+00, 9.9994e-01, 9.9996e-01, 9.9990e-01, 9.9932e-01, 9.4716e-01,
        9.9996e-01, 9.8531e-01, 1.0000e+00, 6.8583e-01, 1.6765e-02, 6.8039e-01,
        2.9870e-08, 9.9885e-01, 8.7812e-01, 4.6573e-05, 2.9322e-05, 9.9925e-01,
        1.0000e+00, 3.9329e-04, 5.7612e-01, 9.9984e-01, 1.0000e+00, 2.4674e-02,
        2.0099e-08, 9.8046e-01]), OrderedDict([('out_layer', Bint[32, ])]), 'real')

### Recurrent neural networks

In [None]:
@make_funsor
def RecurrentLayer(
    x: Funsor,
    Wh: Funsor,
    Wi: Funsor,
    b: Funsor,
    hidden: Bound,
    input: Bound
) -> Fresh[lambda x: x]:
    output = ((Wh * h).reduce(ops.add, "hidden") + (Wi * x).reduce(ops.add, "input") + b).sigmoid()
    return output(hidden="new_hidden")

### Attention

In [6]:
# Can Has be used here?
@make_funsor
def Softmax(
    x: Funsor,
    ax: Bound
) -> Fresh[lambda x: x]:
    return x.exp() / x.exp().reduce(ops.add, ax)

In [3]:
@make_funsor
def Attention(
    Q: Has[{"key"}],
    K: Has[{"key", "seq"}],
    V: Has[{"seq"}],
    M: Has[{"seq"}], # does only one of Q, K, and M need to have "seq"?
    key: Bound,
    seq: Bound
) -> Fresh[lambda Q: Q]:
    x = (Q * K).reduce(ops.add, key) / math.sqrt(key.output.size) + M
    return (Softmax(x, seq) * V).reduce(ops.add, seq)

In [4]:
q = random_tensor(OrderedDict([("key", Bint[10])]))
k = random_tensor(OrderedDict([("key", Bint[10]), ("seq", Bint[3])]))
v = random_tensor(OrderedDict([("seq", Bint[3]), ("val", Bint[5])]))
m = random_tensor(OrderedDict([("seq", Bint[3])]))
Attention(q, k, v, m, "key", "seq")

Tensor(tensor([ 0.6105,  0.0171, -0.4626,  0.8110, -0.1807]), OrderedDict([('val', Bint[5, ])]), 'real')

In [8]:
Softmax(q, Variable("key", Bint[10]))

Tensor(tensor([0.0453, 0.0634, 0.0353, 0.0719, 0.1049, 0.0253, 0.1217, 0.0526, 0.1404,
        0.3392]), OrderedDict([('key', Bint[10, ])]), 'real')

### Convolution

In [31]:
@make_funsor
def Unroll(
    x: Funsor,
    seq: Bound,
    kernel: Funsor
) -> Fresh[lambda x: x]:
    temp_seq = Variable("temp_seq", Bint[seq.output.size - kernel.output.size + 1])
    return x(**{seq.name: temp_seq+kernel})(**{"temp_seq": seq.name})

In [32]:
X = random_tensor(OrderedDict([("chans", Bint[3]), ("seq", Bint[10])]))
kernel = Variable("kernel", Bint[3])
Y = Unroll(X, "seq", kernel)
Y

Tensor(tensor([[[ 0.7573, -0.7584,  0.6781],
         [-0.7584,  0.6781,  0.4517],
         [ 0.6781,  0.4517,  0.3617],
         [ 0.4517,  0.3617, -0.1271],
         [ 0.3617, -0.1271, -0.7190],
         [-0.1271, -0.7190,  0.3197],
         [-0.7190,  0.3197, -1.3162],
         [ 0.3197, -1.3162,  0.4374]],

        [[-0.4557,  1.2475,  0.3826],
         [ 1.2475,  0.3826, -0.9396],
         [ 0.3826, -0.9396, -0.3442],
         [-0.9396, -0.3442, -0.5095],
         [-0.3442, -0.5095,  0.3357],
         [-0.5095,  0.3357,  0.9981],
         [ 0.3357,  0.9981,  0.2339],
         [ 0.9981,  0.2339,  0.1737]],

        [[ 0.7336,  0.4420,  0.2416],
         [ 0.4420,  0.2416, -0.2261],
         [ 0.2416, -0.2261, -0.4358],
         [-0.2261, -0.4358,  1.1235],
         [-0.4358,  1.1235,  0.3109],
         [ 1.1235,  0.3109,  0.1073],
         [ 0.3109,  0.1073, -0.6010],
         [ 0.1073, -0.6010, -0.0119]]]), OrderedDict([('chans', Bint[3, ]), ('seq', Bint[8, ]), ('kernel', Bint[3, 

In [33]:
@make_funsor
def Conv1d(
    X: Has[{"chans", "seq"}],
    W: Has[{"chans", "kernel"}],
    b: Funsor,
    chans: Bound,
    kernel: Bound,
    seq: Bound
) -> Fresh[lambda X: X]:
    return (W * Unroll(X, seq.name, kernel)).reduce(ops.add, frozenset({chans, kernel})) + b

In [34]:
x = random_tensor(OrderedDict([("chans", Bint[3]), ("seq", Bint[10])]))
kernel = Variable("kernel", Bint[3])
w = random_tensor(OrderedDict([("chans", Bint[3]), ("kernel", Bint[3])]))
b = random_tensor(OrderedDict([]))

In [35]:
Conv1d(x, w, b, "chans", "kernel", "seq")

Tensor(tensor([ 5.0808,  3.3734,  2.6978,  1.4531,  2.6193,  0.3668, -3.4585, -0.3620]), OrderedDict([('seq', Bint[8, ])]), 'real')

In [36]:
@make_funsor
def Conv2d(
    X: Has[{"chans", "seq"}],
    W: Has[{"chans", "kernel"}],
    b: Funsor,
    chans: Bound,
    kh: Bound,
    height: Bound,
    kw: Bound,
    width: Bound
) -> Fresh[lambda X: X]:
    return (W * Unroll(Unroll(X, width.name, kw), height.name, kh)).reduce(ops.add, frozenset({chans, kh, kw})) + b

In [37]:
x = random_tensor(OrderedDict([("chans", Bint[3]), ("height", Bint[10]), ("width", Bint[8])]))
kh = Variable("kh", Bint[3])
kw = Variable("kw", Bint[4])
w = random_tensor(OrderedDict([("chans", Bint[3]), ("kh", Bint[3]), ("kw", Bint[4])]))
b = random_tensor(OrderedDict([]))

Conv2d(x, w, b, "chans", "kh", "height", "kw", "width")

KeyError: 'seq'

### Max pooling

In [2]:
@make_funsor
def Unflatten(
    x: Funsor,
    i: Bound,
    i_over_2: Fresh[lambda i: Bint[i.size // 2]],
    i_mod_2: Fresh[lambda: Bint[2]],
) -> Fresh[lambda x: x]:
    assert i.output.size % 2 == 0
    return x(**{i.name: i_over_2 * Number(2, 3) + i_mod_2})

In [3]:
# Partial evaluation
X = random_tensor(OrderedDict([("seq", Bint[10])]))
X_var = Variable("x", Bint[10])
Y = Unflatten(X_var, "seq", "seq2", "kernel")
Y(x=X)

ValueError: Cannot infer domain of i=seq

In [40]:
@make_funsor
def Pool(
    x: Funsor,
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size // k]], # seq -> Bint[]
) -> Fresh[lambda x: x]: # x -> x.output (Bint[] or Real)
    return x(**{seq.name: seq2 * Number(k, k+1) + kernel})

In [42]:
X = random_tensor(OrderedDict([("seq", Bint[10])]))
# kernel = Variable("kernel", Bint[2])
Y = Pool(X, "seq", 2, "kernel", "seq2")
Y

Tensor(tensor([[ 0.4005,  0.0188],
        [ 0.9411,  0.4531],
        [-0.5512, -0.9998],
        [ 0.5990, -0.8712],
        [-0.7337,  0.4031]]), OrderedDict([('seq2', Bint[5, ]), ('kernel', Bint[2, ])]), 'real')

In [45]:
@make_funsor
def Pool2(
    x: Funsor,
    seq: Bound,
    kernel: Funsor,
    seq2: Fresh[lambda seq, kernel: Bint[seq.size // kernel.size]], # seq -> Bint[]
) -> Fresh[lambda x: x]: # x -> x.output (Bint[] or Real)
    return x(**{seq.name: seq2 * Number(kernel.output.size, kernel.output.size+1) + kernel})

In [47]:
Pool2(X, "seq", Variable("kernel", Bint[2]), "seq2")

Tensor(tensor([[ 0.4005,  0.0188],
        [ 0.9411,  0.4531],
        [-0.5512, -0.9998],
        [ 0.5990, -0.8712],
        [-0.7337,  0.4031]]), OrderedDict([('seq2', Bint[5, ]), ('kernel', Bint[2, ])]), 'real')

In [55]:
Y.inputs

OrderedDict([('kernel', Bint[2, ]), ('seq2', Bint[5, ])])

In [18]:
funsor.terms.Tuple(kernel, temp_seq).output

Product[Bint[2, ], Bint[5, ]]

In [32]:
((temp_seq * Number(1, 3))).output

Bint[5, ]

In [14]:
(funkernel * temp_seq).output

Bint[5, ]

In [6]:
X(seq=temp_seq+kernel)

NameError: name 'temp_seq' is not defined

In [7]:
temp_seq = Variable("temp_seq", Bint[5])
X(seq=kernel+temp_seq)

ValueError: Output mismatch: Bint[6, ] vs Bint[10, ]

### Normalization layers

In [None]:
# Pytorch
x.sum(dim=0)
x.mean(dim=0)

# Funsor
x.reduce(ops.add, "i")
x.reduce(ops.mean, "i") # Wrong
x.mean("i")

@make_funsor
def Mean(
    X: Funsor,
    ax: Bound
) -> Fresh[lambda X: X]:
    return ops.mean(funsor.terms.Lambda(X, ax), 0)

In [61]:
@make_funsor
def Mean(
    X: Funsor,
    ax: Bound
) -> Fresh[lambda X: X]:
    return X.reduce(ops.add, ax) / ax.output.size

@make_funsor
def Variance(
    X: Funsor,
    ax: Bound
) -> Fresh[lambda X: X]:
    return Mean((X - Mean(X, ax))**2, ax)

@make_funsor
def Standardize(
    X: Funsor,
    ax: Bound
) -> Fresh[lambda X: X]:
    return (X - Mean(X, ax)) / (Variance(X, ax) + ops.finfo(X.data).eps)

@make_funsor
def BatchNorm(
    X: Funsor,
    gamma: Funsor,
    beta: Funsor,
    batch: Bound,
    layer: Bound
) -> Fresh[lambda X: X]:
    return Standardize(X, batch)

In [62]:
x = random_tensor(OrderedDict([("i", Bint[10])]))
Standardize(x, "i")

Tensor(tensor([-2.0574, -0.4460,  1.0813,  0.4329,  0.9293, -0.0551,  0.2744, -0.5563,
        -1.1598,  1.5569]), OrderedDict([('i', Bint[10, ])]), 'real')

## Transformer

## LeNet

## Other examples

### Discrete random variables

In [55]:
B_given_A = Tensor(
    torch.tensor([[0.2, 0.3, 0.5],
                  [0.8, 0.1, 0.1]]),
)["A", "B"]

A = Tensor(
    torch.tensor([0.6, 0.4]),
)["A"]

In [49]:
A_and_B = B_given_A * A
A_and_B

Tensor(tensor([[0.1200, 0.1800, 0.3000],
        [0.3200, 0.0400, 0.0400]]), OrderedDict([('A', Bint[2, ]), ('B', Bint[3, ])]), 'real')

In [50]:

B = A_and_B.reduce(ops.add, "A")
B

Tensor(tensor([0.4400, 0.2200, 0.3400]), OrderedDict([('B', Bint[3, ])]), 'real')

In [51]:
A_given_B = A_and_B / B
A_given_B

Tensor(tensor([[0.2727, 0.8182, 0.8824],
        [0.7273, 0.1818, 0.1176]]), OrderedDict([('A', Bint[2, ]), ('B', Bint[3, ])]), 'real')

### Advanced indexing

### Continuous bag of words

### Sudoku ILP

### K-means clustering

In [None]:
x = random_tensor(OrderedDict([("batch", Bint[10]), ("d", Bint[4])]))
c = random_tensor(OrderedDict([("clusters", Bint[3]), ("d", Bint[4])]))

### Beam search

### Multivariate normal distribution

# LaTeX Macros

# Formal Definitions

## Records and shapes

## Named tensors

## Named tensor operations

## Common operations

# Differentiation

## Definition

## Rules

## Example

## Broadcasting

# Alternatives