# Review: Jacobian Matrix

In [1]:
import sys
import os

PATH: str = '/development/projects/statisticallyfit/github/learningmathstat/PythonNeuralNetNLP'

NEURALNET_PATH: str = PATH + '/src/MatrixCalculusStudy'

sys.path.append(PATH)
sys.path.append(NEURALNET_PATH)

In [2]:
from sympy import Matrix, MatrixSymbol, Symbol, derive_by_array, diff, sin, exp, symbols, Function
from sympy.abc import i, j

In [3]:
from src.utils.GeneralUtil import *
from src.MatrixCalculusStudy.MatrixDerivLib.symbols import Deriv
from src.MatrixCalculusStudy.MatrixDerivLib.diff import diffMatrix
from src.MatrixCalculusStudy.MatrixDerivLib.printingLatex import myLatexPrinter

from IPython.display import display, Math
from sympy.interactive import printing
printing.init_printing(use_latex='mathjax', latex_printer= lambda e, **kw: myLatexPrinter.doprint(e))

### Jacobian Matrix and Multivariable Functions
A vector $\mathbf{f} = \big( f_1, f_2, ..., f_m \big)$ of $m$ functions, each depending on $n$ variables $\mathbf{x} = \big(x_1, x_2, ..., x_n \big)$ defines a transformation or function from $\mathbb{R}^n$ to $\mathbb{R}^m$. Specifically, if $\mathbf{x} \in \mathbb{R}^n$ and if:
$$
y_1 = f_1 \big(x_1,x_2,...,x_n \big) \\
y_2 = f_2 \big(x_1,x_2,...,x_n \big) \\
\vdots \\
y_m = f_m \big(x_1,x_2,...,x_n \big)
$$
then $\mathbf{y} = \big(y_1, y_2, ..., y_m \big)$ is the point in $\mathbb{R}^m$ that corresponds to $\mathbf{x}$ under the transformation $\mathbf{f}$. We can write these equations more compactly as:
$$
\mathbf{y} = \mathbf{f}(\mathbf{x})
$$
Information about the rate of change of $\mathbf{y}$ with respect to $\mathbf{x}$ is contained in the various partial derivatives $\frac{\partial y_i}{\partial x_j}$ for $1 \leq i \leq m, 1 \leq j \leq n$ and is conveniently organized into an $m \times n$ matrix $\frac{\partial \mathbf{y}}{\partial \mathbf{x}}$ called the **Jacobian matrix** of the transformation $\mathbf{f}$. The Jacobian matrix is the collection of all $m \times n$ possible partial derivatives ($m$ rows and $n$ columns), which is the stack of $m$ gradients with respect to $\mathbf{x}$:
$$
\Large
\begin{aligned}
\frac{\partial \mathbf{y}}{\partial \mathbf{x}} &= \begin{pmatrix}
   \nabla f_1(\mathbf{x}) \\
   \nabla f_2(\mathbf{x}) \\
   \vdots \\
   \nabla f_m(\mathbf{x})
\end{pmatrix}
= \begin{pmatrix}
   \frac{\partial}{\partial \mathbf{x}} f_1(\mathbf{x}) \\
   \frac{\partial}{\partial \mathbf{x}} f_2(\mathbf{x}) \\
   \vdots \\
   \frac{\partial}{\partial \mathbf{x}} f_m(\mathbf{x})
\end{pmatrix} \\
&= \begin{pmatrix}
  \frac{\partial}{\partial x_1} f_1(\mathbf{x}) & \frac{\partial}{\partial x_2} f_1(\mathbf{x}) & ... & \frac{\partial}{\partial x_n} f_1(\mathbf{x}) \\
  \frac{\partial}{\partial x_1} f_2(\mathbf{x}) & \frac{\partial}{\partial x_2} f_2(\mathbf{x}) & ... & \frac{\partial}{\partial x_n} f_2(\mathbf{x}) \\
  \vdots & \vdots &  & \vdots \\
  \frac{\partial}{\partial x_1} f_m(\mathbf{x}) & \frac{\partial}{\partial x_2} f_m(\mathbf{x}) & ... & \frac{\partial}{\partial x_n} f_m(\mathbf{x})
\end{pmatrix} \\

\frac{\partial \mathbf{y}}{\partial \mathbf{x}} &= \begin{pmatrix}
  \frac{\partial f_1}{\partial x_1} & \frac{\partial f_1}{\partial x_2} & ... & \frac{\partial f_1}{\partial x_n} \\
  \frac{\partial f_2}{\partial x_1} & \frac{\partial f_2}{\partial x_2} & ... & \frac{\partial f_2}{\partial x_n} \\
  \vdots & \vdots &  & \vdots \\
  \frac{\partial f_m}{\partial x_1} & \frac{\partial f_m}{\partial x_2} & ... & \frac{\partial f_m}{\partial x_n}
\end{pmatrix}
\end{aligned}
$$
This linear transformation represented by the Jacobian matrix is called **the derivative** of the transformation $\mathbf{f}$.

Each $\frac{\partial f_i}{\partial \mathbf{x}}$ is a horizontal $n$-vector because the partial derivative is with respect to a vector $\mathbf{x}$ whose length is $n = |\mathbf{x}|$, making the width of the Jacobian $n$ (there are $n$ parameters that are variable, each potentially changing the function's value).


In [4]:
X = Matrix(MatrixSymbol('x', 3,3))
X

⎡x₀₀  x₀₁  x₀₂⎤
⎢             ⎥
⎢x₁₀  x₁₁  x₁₂⎥
⎢             ⎥
⎣x₂₀  x₂₁  x₂₂⎦

In [5]:
W = Matrix(MatrixSymbol('w', 3,2))
W

⎡w₀₀  w₀₁⎤
⎢        ⎥
⎢w₁₀  w₁₁⎥
⎢        ⎥
⎣w₂₀  w₂₁⎦

In [6]:
X*W

⎡w₀₀⋅x₀₀ + w₁₀⋅x₀₁ + w₂₀⋅x₀₂  w₀₁⋅x₀₀ + w₁₁⋅x₀₁ + w₂₁⋅x₀₂⎤
⎢                                                        ⎥
⎢w₀₀⋅x₁₀ + w₁₀⋅x₁₁ + w₂₀⋅x₁₂  w₀₁⋅x₁₀ + w₁₁⋅x₁₁ + w₂₁⋅x₁₂⎥
⎢                                                        ⎥
⎣w₀₀⋅x₂₀ + w₁₀⋅x₂₁ + w₂₀⋅x₂₂  w₀₁⋅x₂₀ + w₁₁⋅x₂₁ + w₂₁⋅x₂₂⎦

In [7]:
derive_by_array(X*W, X)

⎡⎡w₀₀  w₀₁⎤  ⎡w₁₀  w₁₁⎤  ⎡w₂₀  w₂₁⎤⎤
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎢ 0    0 ⎥  ⎢ 0    0 ⎥  ⎢ 0    0 ⎥⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎣ 0    0 ⎦  ⎣ 0    0 ⎦  ⎣ 0    0 ⎦⎥
⎢                                  ⎥
⎢⎡ 0    0 ⎤  ⎡ 0    0 ⎤  ⎡ 0    0 ⎤⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎢w₀₀  w₀₁⎥  ⎢w₁₀  w₁₁⎥  ⎢w₂₀  w₂₁⎥⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎣ 0    0 ⎦  ⎣ 0    0 ⎦  ⎣ 0    0 ⎦⎥
⎢                                  ⎥
⎢⎡ 0    0 ⎤  ⎡ 0    0 ⎤  ⎡ 0    0 ⎤⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎢ 0    0 ⎥  ⎢ 0    0 ⎥  ⎢ 0    0 ⎥⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎣⎣w₀₀  w₀₁⎦  ⎣w₁₀  w₁₁⎦  ⎣w₂₀  w₂₁⎦⎦

In [8]:
(X*W).diff(X)




⎡⎡w₀₀  w₀₁⎤  ⎡w₁₀  w₁₁⎤  ⎡w₂₀  w₂₁⎤⎤
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎢ 0    0 ⎥  ⎢ 0    0 ⎥  ⎢ 0    0 ⎥⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎣ 0    0 ⎦  ⎣ 0    0 ⎦  ⎣ 0    0 ⎦⎥
⎢                                  ⎥
⎢⎡ 0    0 ⎤  ⎡ 0    0 ⎤  ⎡ 0    0 ⎤⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎢w₀₀  w₀₁⎥  ⎢w₁₀  w₁₁⎥  ⎢w₂₀  w₂₁⎥⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎣ 0    0 ⎦  ⎣ 0    0 ⎦  ⎣ 0    0 ⎦⎥
⎢                                  ⎥
⎢⎡ 0    0 ⎤  ⎡ 0    0 ⎤  ⎡ 0    0 ⎤⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎢⎢ 0    0 ⎥  ⎢ 0    0 ⎥  ⎢ 0    0 ⎥⎥
⎢⎢        ⎥  ⎢        ⎥  ⎢        ⎥⎥
⎣⎣w₀₀  w₀₁⎦  ⎣w₁₀  w₁₁⎦  ⎣w₂₀  w₂₁⎦⎦

In [9]:

x, y, z = symbols('x y z')
f, g, h = list(map(Function, 'fgh'))

xv = x,y,z
xv

(x, y, z)

In [10]:
yv = [f(*xv), g(*xv), h(*xv)]
yv

[f(x, y, z), g(x, y, z), h(x, y, z)]

In [11]:
Matrix(yv)

⎡f(x, y, z)⎤
⎢          ⎥
⎢g(x, y, z)⎥
⎢          ⎥
⎣h(x, y, z)⎦

In [12]:
#display(Matrix(yv).jacobian(xv))
Matrix(yv).jacobian(Matrix(xv))
#display(yv.jacobian(xv))

⎡∂               ∂               ∂             ⎤
⎢──(f(x, y, z))  ──(f(x, y, z))  ──(f(x, y, z))⎥
⎢∂x              ∂y              ∂z            ⎥
⎢                                              ⎥
⎢∂               ∂               ∂             ⎥
⎢──(g(x, y, z))  ──(g(x, y, z))  ──(g(x, y, z))⎥
⎢∂x              ∂y              ∂z            ⎥
⎢                                              ⎥
⎢∂               ∂               ∂             ⎥
⎢──(h(x, y, z))  ──(h(x, y, z))  ──(h(x, y, z))⎥
⎣∂x              ∂y              ∂z            ⎦

In [13]:
derive_by_array(yv, xv)

⎡∂               ∂               ∂             ⎤
⎢──(f(x, y, z))  ──(g(x, y, z))  ──(h(x, y, z))⎥
⎢∂x              ∂x              ∂x            ⎥
⎢                                              ⎥
⎢∂               ∂               ∂             ⎥
⎢──(f(x, y, z))  ──(g(x, y, z))  ──(h(x, y, z))⎥
⎢∂y              ∂y              ∂y            ⎥
⎢                                              ⎥
⎢∂               ∂               ∂             ⎥
⎢──(f(x, y, z))  ──(g(x, y, z))  ──(h(x, y, z))⎥
⎣∂z              ∂z              ∂z            ⎦

In [14]:
assert Matrix(derive_by_array(yv, xv)).transpose() == Matrix(yv).jacobian(xv)

In [15]:
### TEST 2: substituting values
m = Matrix(yv).jacobian(xv)
m.subs({x:1, y:2, z:3})

⎡⎛d             ⎞│     ⎛d             ⎞│     ⎛d             ⎞│   ⎤
⎢⎜──(f(x, 2, 3))⎟│     ⎜──(f(1, y, 3))⎟│     ⎜──(f(1, 2, z))⎟│   ⎥
⎢⎝dx            ⎠│x=1  ⎝dy            ⎠│y=2  ⎝dz            ⎠│z=3⎥
⎢                                                                ⎥
⎢⎛d             ⎞│     ⎛d             ⎞│     ⎛d             ⎞│   ⎥
⎢⎜──(g(x, 2, 3))⎟│     ⎜──(g(1, y, 3))⎟│     ⎜──(g(1, 2, z))⎟│   ⎥
⎢⎝dx            ⎠│x=1  ⎝dy            ⎠│y=2  ⎝dz            ⎠│z=3⎥
⎢                                                                ⎥
⎢⎛d             ⎞│     ⎛d             ⎞│     ⎛d             ⎞│   ⎥
⎢⎜──(h(x, 2, 3))⎟│     ⎜──(h(1, y, 3))⎟│     ⎜──(h(1, 2, z))⎟│   ⎥
⎣⎝dx            ⎠│x=1  ⎝dy            ⎠│y=2  ⎝dz            ⎠│z=3⎦

In [16]:
m.subs({f(*xv):x**2 * y*z, g(*xv):sin(x*y*z*3), h(*xv):y + z*exp(x)})

⎡   ∂ ⎛ 2    ⎞        ∂ ⎛ 2    ⎞        ∂ ⎛ 2    ⎞   ⎤
⎢   ──⎝x ⋅y⋅z⎠        ──⎝x ⋅y⋅z⎠        ──⎝x ⋅y⋅z⎠   ⎥
⎢   ∂x                ∂y                ∂z           ⎥
⎢                                                    ⎥
⎢∂                 ∂                 ∂               ⎥
⎢──(sin(3⋅x⋅y⋅z))  ──(sin(3⋅x⋅y⋅z))  ──(sin(3⋅x⋅y⋅z))⎥
⎢∂x                ∂y                ∂z              ⎥
⎢                                                    ⎥
⎢  ∂ ⎛       x⎞      ∂ ⎛       x⎞      ∂ ⎛       x⎞  ⎥
⎢  ──⎝y + z⋅ℯ ⎠      ──⎝y + z⋅ℯ ⎠      ──⎝y + z⋅ℯ ⎠  ⎥
⎣  ∂x                ∂y                ∂z            ⎦

In [17]:
m_subs = m.subs({f(*xv):x**2 * y*z, g(*xv):sin(x*y*z*3), h(*xv):y + z*exp(x)})

m_subs.doit()

⎡                            2                   2         ⎤
⎢     2⋅x⋅y⋅z               x ⋅z                x ⋅y       ⎥
⎢                                                          ⎥
⎢3⋅y⋅z⋅cos(3⋅x⋅y⋅z)  3⋅x⋅z⋅cos(3⋅x⋅y⋅z)  3⋅x⋅y⋅cos(3⋅x⋅y⋅z)⎥
⎢                                                          ⎥
⎢          x                                      x        ⎥
⎣       z⋅ℯ                  1                   ℯ         ⎦

In [18]:
m_subs.doit().subs({x:1, y:2, z:3})



⎡    12          3          2    ⎤
⎢                                ⎥
⎢18⋅cos(18)  9⋅cos(18)  6⋅cos(18)⎥
⎢                                ⎥
⎣   3⋅ℯ          1          ℯ    ⎦

In [19]:
# More general / abstract example:

n,m = 5,7

xv = Matrix(n, 1, lambda i,j : var_i('x', i+1))

fs = Matrix(m, 1, lambda i,_ : var_i('f', i+1))

fv = Matrix(m, 1, lambda i,_: func_i('f', i, xLetter = 'x', xLen = n))

mapFFuncToF = dict(zip(fv, fs))
mapFToFFunc = dict(zip(fs, fv))

showGroup([xv, fv, fs])

⎡x₁⎤
⎢  ⎥
⎢x₂⎥
⎢  ⎥
⎢x₃⎥
⎢  ⎥
⎢x₄⎥
⎢  ⎥
⎣x₅⎦

⎡f₁(x₁, x₂, x₃, x₄, x₅)⎤
⎢                      ⎥
⎢f₂(x₁, x₂, x₃, x₄, x₅)⎥
⎢                      ⎥
⎢f₃(x₁, x₂, x₃, x₄, x₅)⎥
⎢                      ⎥
⎢f₄(x₁, x₂, x₃, x₄, x₅)⎥
⎢                      ⎥
⎢f₅(x₁, x₂, x₃, x₄, x₅)⎥
⎢                      ⎥
⎢f₆(x₁, x₂, x₃, x₄, x₅)⎥
⎢                      ⎥
⎣f₇(x₁, x₂, x₃, x₄, x₅)⎦

⎡f₁⎤
⎢  ⎥
⎢f₂⎥
⎢  ⎥
⎢f₃⎥
⎢  ⎥
⎢f₄⎥
⎢  ⎥
⎢f₅⎥
⎢  ⎥
⎢f₆⎥
⎢  ⎥
⎣f₇⎦

In [20]:
fv.jacobian(xv)

# The final jacobian (simplified)
jacF = fv.jacobian(xv).subs(mapFFuncToF)
jacF

⎡ d        d        d        d        d     ⎤
⎢───(f₁)  ───(f₁)  ───(f₁)  ───(f₁)  ───(f₁)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₂)  ───(f₂)  ───(f₂)  ───(f₂)  ───(f₂)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₃)  ───(f₃)  ───(f₃)  ───(f₃)  ───(f₃)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₄)  ───(f₄)  ───(f₄)  ───(f₄)  ───(f₄)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₅)  ───(f₅)  ───(f₅)  ───(f₅)  ───(f₅)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₆)  ───(f₆)  ───(f₆)  ───(f₆

In [21]:

# Doing it the derive_by_array way
import itertools

fv_list = list(itertools.chain(*fv.tolist()))
xv_list = list(itertools.chain(*xv.tolist()))


jacF_derive = Matrix(derive_by_array(fv_list, xv_list)).transpose().subs(mapFFuncToF)

jacF_derive

⎡ d        d        d        d        d     ⎤
⎢───(f₁)  ───(f₁)  ───(f₁)  ───(f₁)  ───(f₁)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₂)  ───(f₂)  ───(f₂)  ───(f₂)  ───(f₂)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₃)  ───(f₃)  ───(f₃)  ───(f₃)  ───(f₃)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₄)  ───(f₄)  ───(f₄)  ───(f₄)  ───(f₄)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₅)  ───(f₅)  ───(f₅)  ───(f₅)  ───(f₅)⎥
⎢dx₁      dx₂      dx₃      dx₄      dx₅    ⎥
⎢                                           ⎥
⎢ d        d        d        d        d     ⎥
⎢───(f₆)  ───(f₆)  ───(f₆)  ───(f₆

In [22]:
assert jacF == jacF_derive