# Introduction

In [1]:
from collections import OrderedDict
import functools

import math
import torch
from torch.distributions import constraints

%env FUNSOR_TYPECHECK=1
import funsor
from funsor.terms import Funsor, Variable, Number
from funsor.tensor import Tensor
from funsor.domains import Array, Bint, Real, Reals
from funsor.factory import Bound, Fresh, Has, Value, make_funsor, to_funsor
import funsor.ops as ops
from funsor.cnf import Contraction
from funsor.testing import random_tensor
from funsor.interpretations import reflect

funsor.set_backend("torch")
torch.set_default_dtype(torch.float32)

env: FUNSOR_TYPECHECK=1


# Informal Overview

## Named Tensors

In [2]:
A = Tensor(
    torch.tensor([[3., 1., 4.],
                  [1., 5., 9.],
                  [2., 6., 5.]])
)["height", "width"]

In [3]:
A(height=0)

Tensor(tensor([3., 1., 4.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [4]:
A(width=2)

Tensor(tensor([4., 9., 5.]), OrderedDict([('height', Bint[3, ])]), 'real')

## Named tensor operations

### Elementwise operations and broadcasting

In [5]:
A.sigmoid()  # 1 / (1 + (-A).exp())

Tensor(tensor([[0.9526, 0.7311, 0.9820],
        [0.7311, 0.9933, 0.9999],
        [0.8808, 0.9975, 0.9933]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [6]:
x = Tensor(torch.tensor([2., 7., 1.]))["height"]

y = Tensor(torch.tensor([1., 4., 1.]))["width"]

In [7]:
A + x

Tensor(tensor([[ 5.,  3.,  6.],
        [ 8., 12., 16.],
        [ 3.,  7.,  6.]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

In [8]:
A + y

Tensor(tensor([[ 4.,  5.,  5.],
        [ 2.,  9., 10.],
        [ 3., 10.,  6.]]), OrderedDict([('height', Bint[3, ]), ('width', Bint[3, ])]), 'real')

### Reductions

In [9]:
A.reduce(ops.add, reduced_vars="height")

Tensor(tensor([ 6., 12., 18.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [10]:
A.reduce(ops.add, reduced_vars="width")

Tensor(tensor([ 8., 15., 13.]), OrderedDict([('height', Bint[3, ])]), 'real')

In [11]:
A.reduce(ops.add, reduced_vars={"height", "width"})

Tensor(36.0, OrderedDict(), 'real')

In [12]:
(A * y).reduce(ops.add, reduced_vars="width")

Tensor(tensor([11., 30., 31.]), OrderedDict([('height', Bint[3, ])]), 'real')

In [13]:
(x * x).reduce(ops.add, reduced_vars="height")  # innder product
x * y  # outer product
(A * y).reduce(ops.add, reduced_vars="width")  # matrix-vector product
# vector-matrix product is the same as matrix-vector product
(A * x).reduce(ops.add, reduced_vars="height")  # vector-matrix product

Tensor(tensor([15., 43., 76.]), OrderedDict([('width', Bint[3, ])]), 'real')

In [14]:
B = Tensor(
    torch.tensor([[3, 2, 5],
                  [5, 4, 0],
                  [8, 3, 6]]),
)["width", "width2"]

In [15]:
(A * B).reduce(ops.add, reduced_vars="width")  # matrix-matrix product

Tensor(tensor([[ 46.,  22.,  39.],
        [100.,  49.,  59.],
        [ 76.,  43.,  40.]]), OrderedDict([('height', Bint[3, ]), ('width2', Bint[3, ])]), 'real')

### Renaming and reshaping

In [16]:
A(height="height2")

Tensor(tensor([[3., 1., 4.],
        [1., 5., 9.],
        [2., 6., 5.]]), OrderedDict([('height2', Bint[3, ]), ('width', Bint[3, ])]), 'real')

# Examples

## Building blocks

### Feedforward neural networks

In [17]:
input_dim = 100
X0 = random_tensor(
    OrderedDict([("input_layer", Bint[input_dim])])
)

hidden_1_dim = 32
W1 = random_tensor(
    OrderedDict([
        ("input_layer", Bint[input_dim]),
        ("hidden_layer_1", Bint[hidden_1_dim])
    ])
)
b1 = random_tensor(
    OrderedDict([("hidden_layer_1", Bint[hidden_1_dim])])
)
X1 = ((W1 * X0).reduce(ops.add, "input_layer") + b1).sigmoid()

hidden_2_dim = 16
W2 = random_tensor(
    OrderedDict([
        ("hidden_layer_1", Bint[hidden_1_dim]),
        ("hidden_layer_2", Bint[hidden_2_dim])
    ])
)
b2 = random_tensor(
    OrderedDict([("hidden_layer_2", Bint[hidden_2_dim])])
)
X2 = ((W2 * X1).reduce(ops.add, "hidden_layer_1") + b2).sigmoid()

hidden_3_dim = 8
W3 = random_tensor(
    OrderedDict([
        ("hidden_layer_2", Bint[hidden_2_dim]),
        ("hidden_layer_3", Bint[hidden_3_dim])
    ])
)
b3 = random_tensor(
    OrderedDict([("hidden_layer_3", Bint[hidden_3_dim])])
)
X3 = ((W3 * X2).reduce(ops.add, "hidden_layer_2") + b3).sigmoid()

In [34]:
@make_funsor
def FullConnLayer(
    x: Has[{"layer"}],
    W: Has[{"layer"}],
    b: Funsor,
    layer: Bound
) -> Fresh[lambda x: x]:
    result = ((W * x).reduce(ops.add, layer) + b).sigmoid()
    return result

In [35]:
input_dim = 100
X0 = random_tensor(
    OrderedDict([("layer", Bint[input_dim])])
)
hidden_1_dim = 32
W1 = random_tensor(
    OrderedDict([
        ("layer", Bint[input_dim]),
        ("out_layer", Bint[hidden_1_dim])
    ])
)
b1 = random_tensor(
    OrderedDict([("out_layer", Bint[hidden_1_dim])])
)

X1 = FullConnLayer(X0, W1, b1, "layer")
X1

Tensor(tensor([1.0000e+00, 9.6420e-04, 5.9377e-01, 9.9878e-01, 3.0042e-03, 9.8825e-01,
        6.0447e-03, 8.1884e-04, 9.0660e-02, 9.9972e-01, 4.1371e-01, 8.4666e-01,
        9.9900e-01, 2.1682e-01, 8.3070e-02, 2.3368e-02, 5.5098e-03, 5.9357e-01,
        1.9522e-04, 9.7225e-01, 7.8749e-03, 1.7118e-01, 1.0000e+00, 1.0000e+00,
        4.8502e-01, 5.7865e-04, 1.0000e+00, 8.2699e-05, 9.9985e-01, 4.4409e-06,
        6.7093e-01, 1.6391e-08]), OrderedDict([('out_layer', Bint[32, ])]), 'real')

### Recurrent neural networks

In [None]:
@make_funsor
def RecurrentLayer(
    x: Funsor,
    Wh: Funsor,
    Wi: Funsor,
    b: Funsor,
    hidden: Bound,
    input: Bound
) -> Fresh[lambda x: x]:
    output = ((Wh * h).reduce(ops.add, "hidden") + (Wi * x).reduce(ops.add, "input") + b).sigmoid()
    return output(hidden="new_hidden")

### Attention

In [36]:
@make_funsor
def Softmax(
    x: Funsor,
    ax: Bound,
    ax2: Fresh[lambda ax: ax]
) -> Fresh[lambda x: x]:
    x = x(**{ax.name: ax2.name})
    y = x - x.reduce(ops.logaddexp, ax2)
    return y.exp()

In [37]:
q = random_tensor(OrderedDict([("key", Bint[10])]))
Softmax(q, "key", "key2")

Tensor(tensor([0.2437, 0.0257, 0.0642, 0.1169, 0.0384, 0.1301, 0.0492, 0.1176, 0.0133,
        0.2010]), OrderedDict([('key2', Bint[10, ])]), 'real')

In [38]:
@make_funsor
def Attention(
    Q: Has[{"key"}],
    K: Has[{"key", "seq"}],
    V: Has[{"seq2"}],
    M: Has[{"seq"}],
    key: Bound,
    seq: Bound,
    seq2: Bound
) -> Fresh[lambda Q: Q]:
    x = (Q * K).reduce(ops.add, key) / math.sqrt(key.output.size) + M
    return (Softmax(x, seq, seq2) * V).reduce(ops.add, seq2)

In [39]:
q = random_tensor(OrderedDict([("key", Bint[10])]))
k = random_tensor(OrderedDict([("key", Bint[10]), ("seq", Bint[3])]))
v = random_tensor(OrderedDict([("seq2", Bint[3]), ("val", Bint[5])]))
m = random_tensor(OrderedDict([("seq", Bint[3])]))
Attention(q, k, v, m, "key", "seq", "seq2")

Tensor(tensor([-0.8274, -1.4611, -0.2073, -0.7946,  0.5313]), OrderedDict([('val', Bint[5, ])]), 'real')

### Convolution

In [40]:
@make_funsor
def Unroll(
    x: Funsor,
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size - k + 1]]
) -> Fresh[lambda x: x]:
    return x(**{seq.name: seq2 + kernel})

In [64]:
@make_funsor
def Unroll2(
    x: Funsor,
    seq: Bound,
    kernel: Funsor,
    seq2: Fresh[lambda seq, kernel: Bint[seq.size - kernel.size + 1]]
) -> Fresh[lambda x: x]:
    return x(**{seq.name: seq2 + kernel})

In [66]:
X = random_tensor(OrderedDict([("chans", Bint[3]), ("seq", Bint[10])]))
Y = Unroll2(X, "seq", Variable("kernel", Bint[3]), "seq2")
Y

Tensor(tensor([[[-2.1018,  1.1907, -0.8056],
         [ 1.1907, -0.8056, -1.1722],
         [-0.8056, -1.1722,  0.5730],
         [-1.1722,  0.5730, -0.1003],
         [ 0.5730, -0.1003, -0.4544],
         [-0.1003, -0.4544, -0.0859],
         [-0.4544, -0.0859, -1.4297],
         [-0.0859, -1.4297,  0.2003]],

        [[ 1.0350, -0.2168,  1.6012],
         [-0.2168,  1.6012,  1.5195],
         [ 1.6012,  1.5195,  1.2746],
         [ 1.5195,  1.2746, -2.1400],
         [ 1.2746, -2.1400,  0.2788],
         [-2.1400,  0.2788,  0.0247],
         [ 0.2788,  0.0247, -0.2465],
         [ 0.0247, -0.2465, -0.9741]],

        [[ 0.0922, -0.5518, -0.9372],
         [-0.5518, -0.9372,  0.3317],
         [-0.9372,  0.3317,  0.7012],
         [ 0.3317,  0.7012,  0.4599],
         [ 0.7012,  0.4599,  0.4827],
         [ 0.4599,  0.4827, -1.1506],
         [ 0.4827, -1.1506,  1.1231],
         [-1.1506,  1.1231, -1.5177]]]), OrderedDict([('chans', Bint[3, ]), ('seq2', Bint[8, ]), ('kernel', Bint[3,

In [61]:
@make_funsor
def Conv1d(
    X: Has[{"chans", "seq"}],
    W: Has[{"chans", "kernel"}],
    b: Funsor,
    chans: Bound,
    k: Value[int],
    kernel: Bound,
    seq: Bound,
    seq2: Fresh[lambda seq, k: Bint[seq.size - k + 1]]
) -> Fresh[lambda X: X]:
    y = W * Unroll(X, seq, k, kernel, seq2)
    return y.reduce(ops.add, frozenset({chans, kernel})) + b

In [62]:
x = random_tensor(OrderedDict([("chans", Bint[3]), ("seq", Bint[10])]))
kernel = Variable("kernel", Bint[3])
w = random_tensor(OrderedDict([("chans", Bint[3]), ("kernel", Bint[3])]))
b = random_tensor(OrderedDict([]))

In [63]:
Conv1d(x, w, b, "chans", 3, "kernel", "seq", "seq2")

Tensor(tensor([-0.6673,  1.9524, -1.1526, -2.9487,  4.3021,  1.8157, -0.9582, -1.5912]), OrderedDict([('seq2', Bint[8, ])]), 'real')

In [77]:
@make_funsor
def Conv2d(
    X: Has[{"chans", "height", "width"}],
    W: Has[{"chans", "kh", "kw"}],
    b: Funsor,
    chans: Bound,
    kh_size: Value[int],
    kh: Bound,
    height: Bound,
    height2: Fresh[lambda height, kh_size: Bint[height.size - kh_size + 1]],
    kw_size: Value[int],
    kw: Bound,
    width: Bound,
    width2: Fresh[lambda width, kw_size: Bint[width.size - kw_size + 1]]
) -> Fresh[lambda X: X]:
    y = W * Unroll(Unroll(X, width, kw_size, kw, width2), height, kh_size, kh, height2)
    return y.reduce(ops.add, frozenset({chans, kh, kw})) + b

In [78]:
x = random_tensor(OrderedDict([("chans", Bint[3]), ("height", Bint[10]), ("width", Bint[8])]))
w = random_tensor(OrderedDict([("chans", Bint[3]), ("kh", Bint[3]), ("kw", Bint[4])]))
b = random_tensor(OrderedDict([]))

Conv2d(x, w, b, "chans", 3, "kh", "height", "height2", 4, "kw", "width", "width2")

Tensor(tensor([[  3.8090,  -1.9533,   7.4257,  -0.8424,  -3.7585],
        [-10.2185,   0.3733,   1.8906,   1.1670, -10.3593],
        [-11.2712,  -0.9658,  -6.6962,   6.9607,   7.3948],
        [ -2.3084,  -3.1277,   3.3852,  -2.3263,   0.1138],
        [-16.6359,   2.2360,  12.6339,  -2.7315,   0.9199],
        [  3.5712,   0.5392,  -8.0635, -11.6026,   1.2325],
        [ 14.3903,   3.6469,   4.7069,  -4.9921, -15.5302],
        [ 15.0121,  -1.5841,   0.6507,  -6.7394,  -0.1217]]), OrderedDict([('height2', Bint[8, ]), ('width2', Bint[5, ])]), 'real')

### Max pooling

In [100]:
@make_funsor
def Pool(
    x: Has[{"seq"}],
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size // k]], # seq -> Bint[]
) -> Fresh[lambda x: x]: # x -> x.output (Bint[] or Real)
    assert not seq.output.size % k
    return x(**{seq.name: seq2 * Number(k, k+1) + kernel})

In [101]:
X = random_tensor(OrderedDict([("seq", Bint[10])]))
Y = Pool(X, "seq", 2, "kernel", "seq2")
Y

Tensor(tensor([[-1.3082, -1.4157],
        [-0.2013,  1.0847],
        [ 1.3470, -0.3982],
        [ 0.1646,  0.3407],
        [-0.1591,  0.1409]]), OrderedDict([('seq2', Bint[5, ]), ('kernel', Bint[2, ])]), 'real')

In [102]:
@make_funsor
def MaxPool1d(
    X: Has[{"seq"}],
    seq: Bound,
    k: Value[int],
    kernel: Fresh[lambda k: Bint[k]],
    seq2: Fresh[lambda seq, k: Bint[seq.size // k]]
) -> Fresh[lambda X: X]:
    return Pool(X, seq, k, kernel, seq2).reduce(ops.max, kernel)

In [103]:
X = random_tensor(OrderedDict([("seq", Bint[10])]))
Y = MaxPool1d(X, "seq", 2, "kernel", "seq2")
Y

Tensor(tensor([3.0786, 0.7304, 1.4282, 1.7233, 1.3209]), OrderedDict([('seq2', Bint[5, ])]), 'real')

In [104]:
@make_funsor
def MaxPool2d(
    X: Has[{"height", "width"}],
    height: Bound,
    kh_size: Value[int],
    kh: Fresh[lambda kh_size: Bint[kh_size]],
    height2: Fresh[lambda height, kh_size: Bint[height.size // kh_size]],
    width: Bound,
    kw_size: Value[int],
    kw: Fresh[lambda kw_size: Bint[kw_size]],
    width2: Fresh[lambda width, kw_size: Bint[width.size // kw_size]],
) -> Fresh[lambda X: X]:
    y = Pool(Pool(X, height, kh_size, kh, height2), width, kw_size, kw, width2)
    return y.reduce(ops.max, frozenset({kh, kw}))

In [107]:
X = random_tensor(OrderedDict([("width", Bint[9]), ("height", Bint[4])]))
Y = MaxPool2d(X, "height", 2, "kh", "height2", "width", 3, "kw", "width2")
Y

Tensor(tensor([[1.1734, 0.5261],
        [1.3100, 1.4246],
        [0.9372, 1.0184]]), OrderedDict([('width2', Bint[3, ]), ('height2', Bint[2, ])]), 'real')

In [45]:
@make_funsor
def Pool2(
    x: Funsor,
    seq: Bound,
    kernel: Funsor,
    seq2: Fresh[lambda seq, kernel: Bint[seq.size // kernel.size]], # seq -> Bint[]
) -> Fresh[lambda x: x]: # x -> x.output (Bint[] or Real)
    return x(**{seq.name: seq2 * Number(kernel.output.size, kernel.output.size+1) + kernel})

### Normalization layers

In [None]:
# Pytorch
x.sum(dim=0)
x.mean(dim=0)

# Funsor
x.reduce(ops.add, "i")
x.reduce(ops.mean, "i") # Wrong
x.mean("i")

@make_funsor
def Mean(
    X: Funsor,
    ax: Bound
) -> Fresh[lambda X: X]:
    return ops.mean(funsor.terms.Lambda(X, ax), 0)

In [16]:
@make_funsor
def Mean(
    X: Has[{"ax"}],
    ax: Bound
) -> Fresh[lambda X: X]:
    return X.reduce(ops.add, ax) / ax.output.size

@make_funsor
def Mean2(
    X: Has[{"ax", "ax2"}],
    ax: Bound,
    ax2: Bound
) -> Fresh[lambda X: X]:
    return X.reduce(ops.add, frozenset({ax, ax2})) / (ax.output.size * ax2.output.size)

@make_funsor
def Variance(
    X: Has[{"ax"}],
    ax: Bound
) -> Fresh[lambda X: X]:
    return Mean((X - Mean(X, ax))**2, ax)


@make_funsor
def Variance2(
    X: Has[{"ax", "ax2"}],
    ax: Bound,
    ax2: Bound
) -> Fresh[lambda X: X]:
    return Mean2((X - Mean2(X, ax, ax2))**2, ax, ax2)

@make_funsor
def Standardize(
    X: Has[{"ax"}],
    ax: Bound,
    new_ax: Fresh[lambda ax: ax]
) -> Fresh[lambda X: X]:
    y = X(**{ax.name: new_ax})
    return (y - Mean(X, ax)) / (Variance(X, ax) + ops.finfo(X.data).eps).sqrt()

@make_funsor
def Standardize2(
    X: Has[{"ax", "ax2"}],
    ax: Bound,
    ax2: Bound,
    new_ax: Fresh[lambda ax: ax],
    new_ax2: Fresh[lambda ax2: ax2]
) -> Fresh[lambda X: X]:
    y = X(**{ax.name: new_ax, ax2.name: new_ax2})
    return (y - Mean2(X, ax, ax2)) / (Variance2(X, ax, ax2) + ops.finfo(X.data).eps).sqrt()

In [23]:
@make_funsor
def BatchNorm(
    X: Has[{"batch", "layer"}],
    gamma: Funsor,
    beta: Funsor,
    batch: Bound,
    layer: Bound,
    batch2: Fresh[lambda batch: batch],
    layer2: Fresh[lambda layer: layer]
) -> Fresh[lambda X: X]:
    return Standardize2(X, batch, layer, batch2, layer2) * gamma + beta

@make_funsor
def InstanceNorm(
    X: Has[{"layer"}],
    gamma: Funsor,
    beta: Funsor,
    layer: Bound,
    layer2: Fresh[lambda layer: layer]
) -> Fresh[lambda X: X]:
    return Standardize(X, layer, layer2) * gamma + beta

In [24]:
x = random_tensor(OrderedDict([("batch", Bint[4]), ("chans", Bint[3]), ("layer", Bint[5])]))
g = random_tensor(OrderedDict([("chans", Bint[3])]))
b = random_tensor(OrderedDict([("chans", Bint[3])]))

BatchNorm(x, g, b, "batch", "layer", "batch2", "layer2")

Tensor(tensor([[[ 1.4904,  1.4674,  1.4772,  1.4581,  1.4745],
         [ 1.3085,  1.4470,  1.3362,  1.3710,  1.3120],
         [-0.4153, -0.5301, -0.5862, -0.4013, -0.7001]],

        [[ 1.4705,  1.4701,  1.4662,  1.4754,  1.4709],
         [ 1.3233,  1.2909,  1.3478,  1.2751,  1.3455],
         [-0.6072, -0.2954, -0.2990, -0.4556, -0.8701]],

        [[ 1.4698,  1.4679,  1.4770,  1.4833,  1.4904],
         [ 1.3641,  1.3555,  1.3911,  1.3633,  1.3066],
         [-0.7784, -0.6248, -0.5895, -0.3076, -0.3127]],

        [[ 1.4795,  1.4730,  1.4691,  1.4822,  1.4694],
         [ 1.4017,  1.3826,  1.3396,  1.3068,  1.3966],
         [-0.1873, -0.4955, -0.4423, -0.1039, -0.5801]]]), OrderedDict([('batch2', Bint[4, ]), ('chans', Bint[3, ]), ('layer2', Bint[5, ])]), 'real')

In [25]:
InstanceNorm(x, g, b, "layer", "layer2")

Tensor(tensor([[[ 1.4865,  1.4696,  1.4768,  1.4628,  1.4748],
         [ 1.3104,  1.4234,  1.3330,  1.3614,  1.3132],
         [-0.2882, -0.4851, -0.5813, -0.2642, -0.7768]],

        [[ 1.4739,  1.4728,  1.4622,  1.4869,  1.4748],
         [ 1.3580,  1.3115,  1.3931,  1.2889,  1.3898],
         [-0.5691, -0.2935, -0.2966, -0.4350, -0.8015]],

        [[ 1.4667,  1.4649,  1.4735,  1.4794,  1.4860],
         [ 1.3604,  1.3473,  1.4013,  1.3591,  1.2732],
         [-0.7431, -0.5846, -0.5482, -0.2573, -0.2625]],

        [[ 1.4813,  1.4718,  1.4659,  1.4853,  1.4664],
         [ 1.3897,  1.3678,  1.3188,  1.2813,  1.3838],
         [-0.2981, -0.6177, -0.5626, -0.2117, -0.7055]]]), OrderedDict([('batch', Bint[4, ]), ('chans', Bint[3, ]), ('layer2', Bint[5, ])]), 'real')

## Transformer

## LeNet

## Other examples

### Discrete random variables

In [55]:
B_given_A = Tensor(
    torch.tensor([[0.2, 0.3, 0.5],
                  [0.8, 0.1, 0.1]]),
)["A", "B"]

A = Tensor(
    torch.tensor([0.6, 0.4]),
)["A"]

In [49]:
A_and_B = B_given_A * A
A_and_B

Tensor(tensor([[0.1200, 0.1800, 0.3000],
        [0.3200, 0.0400, 0.0400]]), OrderedDict([('A', Bint[2, ]), ('B', Bint[3, ])]), 'real')

In [50]:

B = A_and_B.reduce(ops.add, "A")
B

Tensor(tensor([0.4400, 0.2200, 0.3400]), OrderedDict([('B', Bint[3, ])]), 'real')

In [51]:
A_given_B = A_and_B / B
A_given_B

Tensor(tensor([[0.2727, 0.8182, 0.8824],
        [0.7273, 0.1818, 0.1176]]), OrderedDict([('A', Bint[2, ]), ('B', Bint[3, ])]), 'real')

### Advanced indexing

### Continuous bag of words

### Sudoku ILP

### K-means clustering

In [None]:
x = random_tensor(OrderedDict([("batch", Bint[10]), ("d", Bint[4])]))
c = random_tensor(OrderedDict([("clusters", Bint[3]), ("d", Bint[4])]))

### Beam search

### Multivariate normal distribution

# LaTeX Macros

# Formal Definitions

## Records and shapes

## Named tensors

## Named tensor operations

## Common operations

# Differentiation

## Definition

## Rules

## Example

## Broadcasting

# Alternatives