# **Librerias**

In [1]:
# Importamos

import jax
import torch
import jax.numpy as jaxnp

from jax import grad

# Version 

print(f'JAX Version: {jax.__version__}')
print(f'Pytorch Version: {torch.__version__}')

JAX Version: 0.4.34
Pytorch Version: 2.5.0+cpu


# **Traza**

* **Primer Orden:**

$$\frac{\partial}{\partial X} \, \text{Tr}(X) = I$$
$$\frac{\partial}{\partial X} \, \text{Tr}(XA) = A^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(AXB) = A^T B^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(AX^T B) = BA$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X^T A) = A$$
$$\frac{\partial}{\partial X} \, \text{Tr}(AX^T) = A$$
$$\frac{\partial}{\partial X} \, \text{Tr}(A \otimes X) = \text{Tr}(A) I$$

* **Segundo Orden:**

$$\frac{\partial}{\partial X} \, \text{Tr}(X^2) = 2X^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X^2 B) = (XB + BX)^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X^T B X) = BX + B^T X$$
$$\frac{\partial}{\partial X} \, \text{Tr}(B X X^T) = BX + B^T X$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X X^T B) = BX + B^T X$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X B X^T) = X B^T + XB$$
$$\frac{\partial}{\partial X} \, \text{Tr}(B X^T X) = X B^T + XB$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X^T X B) = X B^T + XB$$
$$\frac{\partial}{\partial X} \, \text{Tr}(A X B X) = A^T X^T B^T + B^T X^T A^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X^T X) = \frac{\partial}{\partial X} \, \text{Tr}(X X^T) = 2X$$
$$\frac{\partial}{\partial X} \, \text{Tr}(B^T X^T C X B) = C^T X B B^T + C X B B^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X^T B X C) = B X C + B^T X C^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(A X B X^T C) = A^T C^T X B^T + C A X B$$
$$\frac{\partial}{\partial X} \, \text{Tr} \big( (A X B + C)(A X B + C)^T \big) = 2 A^T (A X B + C) B^T$$
$$\frac{\partial}{\partial X} \, \text{Tr}(X \otimes X) = \frac{\partial}{\partial X} \, \text{Tr}(X) \text{Tr}(X) = 2 \, \text{Tr}(X) I$$



### **Ejercicio #1**

**Funcion:**

$$F(X) = \text{Tr}(XA)$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(AB) = \text{Tr}(BA)$$

**Derivada:**

$$F(X) = \text{Tr}(AX)$$
$$df(X) = \text{Tr}(d(AX))$$
$$df(X) = \text{Tr}(AdX + dAX)$$
$$df(X) = \text{Tr}(AdX)$$

**Gradiente:** 

$$(\nabla X)^T = A$$
$$\nabla X = A^T$$

In [2]:
# Definimos las Matrices

X = torch.tensor([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0]], requires_grad = True)
A = torch.tensor([[2.0, 8.0], [6.0, 1.0], [4.0, 3.0]], requires_grad = False)

# Definimos Nuestra Funcion Escalar

func_torch = torch.trace(torch.matmul(A, X))

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[2., 6., 4.],
        [8., 1., 3.]])


In [3]:
# Definimos las Matrices

X = jaxnp.array([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0]])
A = jaxnp.array([[2.0, 8.0], [6.0, 1.0], [4.0, 3.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.trace(jaxnp.matmul(A, X))

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[2. 6. 4.]
 [8. 1. 3.]]


In [4]:
# Definimos Nuestra Matriz

A = torch.tensor([[2.0, 8.0], [6.0, 1.0], [4.0, 3.0]])

# Calculamos el Gradiente

gradient = A.T

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[2., 6., 4.],
        [8., 1., 3.]])


### **Ejercicio #2**

**Funcion:**

$$F(X) = \text{Tr}(AXB)$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$

**Derivada:**

$$F(X) = \text{Tr}(BAX)$$
$$df(X) = \text{Tr}(d(BAX))$$
$$df(X) = \text{Tr}(BAdX + BdAX + dBAX)$$
$$df(X) = \text{Tr}(BAdX)$$

**Gradiente:** 

$$(\nabla X)^T = BA$$
$$\nabla X = A^TB^T$$

In [5]:
# Definimos las Matrices

X = torch.tensor([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0]], requires_grad = True)
A = torch.tensor([[2.0, 8.0], [6.0, 1.0], [4.0, 3.0]], requires_grad = False)
B = torch.tensor([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0], [5.0, 1.0, 1.0]], requires_grad = False)

# Definimos Nuestra Funcion Escalar

func_torch = torch.trace(torch.matmul(torch.matmul(A, X), B))

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[42., 52., 20.],
        [57., 21., 44.]])


In [6]:
# Definimos las Matrices

X = jaxnp.array([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0]])
A = jaxnp.array([[2.0, 8.0], [6.0, 1.0], [4.0, 3.0]])
B = jaxnp.array([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0], [5.0, 1.0, 1.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.trace(jaxnp.matmul(jaxnp.matmul(A, X), B))

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[42. 52. 20.]
 [57. 21. 44.]]


In [7]:
# Definimos Nuestras Matrices

A = torch.tensor([[2.0, 8.0], [6.0, 1.0], [4.0, 3.0]])
B = torch.tensor([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0], [5.0, 1.0, 1.0]])

# Calculamos el Gradiente

gradient = torch.matmul(A.T, B.T)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[42., 52., 20.],
        [57., 21., 44.]])


### **Ejercicio #3**

**Funcion:**

$$F(X) = \text{Tr}(X^2B)$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(ABC) = \text{Tr}(CAB) = \text{Tr}(BCA)$$
$$\text{Tr}(A + B) = \text{Tr}(A) + \text{Tr}(B)$$

**Derivada:**

$$F(X) = \text{Tr}(XXB)$$
$$F(X) = \text{Tr}(BXX)$$
$$df(X) = \text{Tr}(d(BXX))$$
$$df(X) = \text{Tr}(BXdX + BdXX + dBXX)$$
$$df(X) = \text{Tr}(BXdX + BdXX)$$
$$df(X) = \text{Tr}(BXdX) +  \text{Tr}(BdXX)$$
$$df(X) = \text{Tr}(BXdX) +  \text{Tr}(XBdX)$$

**Gradiente:** 

$$(\nabla X)^T = BX + XB$$
$$\nabla X = (BX + XB)^T$$

---------------

**NOTA:** Si definimos dentro de Pytorch y JAX la funcion $\text{trace}(\text{matmul}(X^2, B))$ el calculo del gradiente sale errado.

In [8]:
# Definimos las Matrices

X = torch.tensor([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0], [5.0, 4.0, 1.0]], requires_grad = True)
B = torch.tensor([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0], [5.0, 1.0, 1.0]], requires_grad = False)

# Definimos Nuestra Funcion Escalar

func_torch = torch.trace(torch.matmul(torch.matmul(X, X), B))

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[ 78.,  96.,  47.],
        [ 61.,  81.,  57.],
        [ 74., 104.,  79.]])


In [9]:
# Definimos las Matrices

X = jaxnp.array([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0], [5.0, 4.0, 1.0]])
B = jaxnp.array([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0], [5.0, 1.0, 1.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.trace(jaxnp.matmul(jaxnp.matmul(X, X), B))

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[ 78.  96.  47.]
 [ 61.  81.  57.]
 [ 74. 104.  79.]]


In [10]:
# Definimos Nuestras Matrices

X = torch.tensor([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0], [5.0, 4.0, 1.0]])
B = torch.tensor([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0], [5.0, 1.0, 1.0]])

# Calculamos el Gradiente

gradient = (torch.matmul(B, X) + torch.matmul(X, B)).T

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[ 78.,  96.,  47.],
        [ 61.,  81.,  57.],
        [ 74., 104.,  79.]])


### **Ejercicio #4**

**Funcion:**

$$F(X) = \text{Tr}(AXBX^TC)$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(ABC) = \text{Tr}(CAB) = \text{Tr}(BCA)$$
$$\text{Tr}(A + B) = \text{Tr}(A) + \text{Tr}(B)$$

**Derivada:**

$$df(X) = \text{Tr}(d(AXBX^TC))$$
$$df(X) = \text{Tr}(AXBX^TdC + AXBdX^TC + AXdBX^TC + AdXBX^TC + dAXBX^TC)$$
$$df(X) = \text{Tr}(AXBdX^TC + AdXBX^TC)$$
$$df(X) = \text{Tr}(AXBdX^TC) +  \text{Tr}(AdXBX^TC)$$
$$df(X) = \text{Tr}(dX^TCAXB) +  \text{Tr}(BX^TCAdX)$$
$$df(X) = \text{Tr}(dX^TCAXB) +  \text{Tr}(BX^TCAdX)$$
$$df(X) = \text{Tr}(B^TX^TA^TC^TdX) +  \text{Tr}(BX^TCAdX)$$
**Gradiente:** 

$$(\nabla X)^T = B^TX^TA^TC^T + BX^TCA$$
$$\nabla X = CAXB + A^TC^TXB^T$$

In [11]:
# Definimos las Matrices

X = torch.tensor([[1.0, 2.0, 7.0, 5.0], [3.0, 4.0, 9.0, 1.0], [5.0, 4.0, 1.0, 3.0]], requires_grad = True)
A = torch.tensor([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0]], requires_grad = False)
B = torch.tensor([[5.0, 2.0, 5.0, 9.0], [1.0, 7.0, 2.0, 9.0], [5.0, 1.0, 1.0, 9.0], [5.0, 1.0, 1.0, 9.0]], requires_grad = False)
C = torch.tensor([[5.0, 4.0], [2.0, 2.0], [8.0, 5.0]], requires_grad = False)

# Definimos Nuestra Funcion Escalar

func_torch = torch.trace(torch.matmul(torch.matmul(torch.matmul(torch.matmul(A, X), B), X.T), C))

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[12612.,  9502.,  7804., 18310.],
        [10815.,  8734.,  7179., 11827.],
        [16249., 11958.,  9793., 25043.]])


In [12]:
# Definimos las Matrices

X = jaxnp.array([[1.0, 2.0, 7.0, 5.0], [3.0, 4.0, 9.0, 1.0], [5.0, 4.0, 1.0, 3.0]])
A = jaxnp.array([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0]])
B = jaxnp.array([[5.0, 2.0, 5.0, 9.0], [1.0, 7.0, 2.0, 9.0], [5.0, 1.0, 1.0, 9.0], [5.0, 1.0, 1.0, 9.0]])
C = jaxnp.array([[5.0, 4.0], [2.0, 2.0], [8.0, 5.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.trace(jaxnp.matmul(jaxnp.matmul(jaxnp.matmul(jaxnp.matmul(A, X), B), X.T), C))

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[12612.  9502.  7804. 18310.]
 [10815.  8734.  7179. 11827.]
 [16249. 11958.  9793. 25043.]]


In [13]:
# Definimos Nuestras Matrices

X = torch.tensor([[1.0, 2.0, 7.0, 5.0], [3.0, 4.0, 9.0, 1.0], [5.0, 4.0, 1.0, 3.0]])
A = torch.tensor([[5.0, 2.0, 5.0], [1.0, 7.0, 2.0]])
B = torch.tensor([[5.0, 2.0, 5.0, 9.0], [1.0, 7.0, 2.0, 9.0], [5.0, 1.0, 1.0, 9.0], [5.0, 1.0, 1.0, 9.0]])
C = torch.tensor([[5.0, 4.0], [2.0, 2.0], [8.0, 5.0]])

# Calculamos el Gradiente

gradient = torch.matmul(torch.matmul(torch.matmul(C, A), X), B) + torch.matmul(torch.matmul(torch.matmul(A.T, C.T), X), B.T)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[12612.,  9502.,  7804., 18310.],
        [10815.,  8734.,  7179., 11827.],
        [16249., 11958.,  9793., 25043.]])


# **Norma**

**Frobenius:**

$$\frac{\partial}{\partial X} \, \|X\|_F^2 = \frac{\partial}{\partial X} \, \text{Tr}(X X^T)$$


### **Ejercicio #1**

**Funcion:**

$$F(X) = \, \|X\|_F^2$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$\, \|X\|_F^2 = \text{Tr}(X X^T)$$
$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(ABC) = \text{Tr}(CAB) = \text{Tr}(BCA)$$
$$\text{Tr}(A + B) = \text{Tr}(A) + \text{Tr}(B)$$

**Derivada:**

$$df(X) = \text{Tr}(d(X X^T))$$
$$df(X) = \text{Tr}(XdX^T + dXX^T)$$
$$df(X) = \text{Tr}(dXX^T + dXX^T)$$
$$df(X) = \text{Tr}(dXX^T) +  \text{Tr}(dXX^T)$$
$$df(X) = \text{Tr}(X^TdX) +  \text{Tr}(X^TdX)$$

**Gradiente:** 

$$(\nabla X)^T = X^T + X^T = 2X^T$$
$$\nabla X = 2X$$

In [14]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 7.0, 5.0], [3.0, 4.0, 9.0, 1.0], [5.0, 4.0, 1.0, 3.0], [9.0, 5.0, 7.0, 2.0]], requires_grad = True)

# Definimos Nuestra Funcion Escalar

func_torch = torch.norm(X)**2

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[ 2.,  4., 14., 10.],
        [ 6.,  8., 18.,  2.],
        [10.,  8.,  2.,  6.],
        [18., 10., 14.,  4.]])


In [15]:
# Definimos Nuestra Matriz

X = jaxnp.array([[1.0, 2.0, 7.0, 5.0], [3.0, 4.0, 9.0, 1.0], [5.0, 4.0, 1.0, 3.0], [9.0, 5.0, 7.0, 2.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.linalg.norm(X)**2

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[ 2.  4. 14. 10.]
 [ 6.  8. 18.  2.]
 [10.  8.  2.  6.]
 [18. 10. 14.  4.]]


In [16]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 7.0, 5.0], [3.0, 4.0, 9.0, 1.0], [5.0, 4.0, 1.0, 3.0], [9.0, 5.0, 7.0, 2.0]])

# Calculamos el Gradiente

gradient = 2 * X

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[ 2.,  4., 14., 10.],
        [ 6.,  8., 18.,  2.],
        [10.,  8.,  2.,  6.],
        [18., 10., 14.,  4.]])


### **Ejercicio #2**

**Funcion:**

$$F(X) = ||AX - B||^{2}$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$\, \|X\|_F^2 = \text{Tr}(X X^T)$$
$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(ABC) = \text{Tr}(CAB) = \text{Tr}(BCA)$$
$$\text{Tr}(A + B) = \text{Tr}(A) + \text{Tr}(B)$$

**Derivada:**

$$df(X) = \text{Tr}(d((AX - B) (AX - B)^T))$$
$$df(X) = \text{Tr}((AX - B) d(AX - B)^T + d(AX - B) (AX - B)^T)$$
$$df(X) = \text{Tr}((AX - B) dX^TA^T + AdX (AX - B)^T)$$
$$df(X) = \text{Tr}(AdX(AX - B)^T + AdX (AX - B)^T)$$
$$df(X) = \text{Tr}(AdX(AX - B)^T) +  \text{Tr}(AdX (AX - B)^T)$$
$$df(X) = \text{Tr}((AX - B)^TAdX) +  \text{Tr}((AX - B)^TAdX)$$

**Gradiente:** 

$$(\nabla X)^T = (AX - B)^TA + (AX - B)^TA = 2(AX - B)^TA $$
$$\nabla X = 2  A^T (AX - B)$$

In [17]:
# Definimos Nuestra Matrices

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0]], requires_grad = True)
A = torch.tensor([[1.0, 2.0], [3.0, 4.0], [7.0, 9.0], [5.0, 3.0]], requires_grad = False)
B = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [9.0, 5.0, 3.0], [7.0, 1.0, 2.0]], requires_grad = False)

# Definimos Nuestra Funcion Escalar

func_torch = torch.norm(torch.matmul(A, X) - B)**2

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[ 504.,  964., 2180.],
        [ 612., 1112., 2498.]])


In [18]:
# Definimos Nuestras Matrices

X = jaxnp.array([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0]])
A = jaxnp.array([[1.0, 2.0], [3.0, 4.0], [7.0, 9.0], [5.0, 3.0]])
B = jaxnp.array([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [9.0, 5.0, 3.0], [7.0, 1.0, 2.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.linalg.norm(jaxnp.matmul(A, X) - B)**2

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[ 504.  964. 2180.]
 [ 612. 1112. 2498.]]


In [19]:
# Definimos Nuestra Matrices

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0]])
A = torch.tensor([[1.0, 2.0], [3.0, 4.0], [7.0, 9.0], [5.0, 3.0]])
B = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [9.0, 5.0, 3.0], [7.0, 1.0, 2.0]])

# Calculamos el Gradiente

gradient = 2 * torch.matmul(A.T, (torch.matmul(A, X) - B))

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[ 504.,  964., 2180.],
        [ 612., 1112., 2498.]])


### **Ejercicio #3**

**Funcion:**

$$F(X) = ||X||_F$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$\, \|X\|_F^2 = \text{Tr}(X X^T)$$
$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(ABC) = \text{Tr}(CAB) = \text{Tr}(BCA)$$
$$\text{Tr}(A + B) = \text{Tr}(A) + \text{Tr}(B)$$

**Derivada:**

$$df(X) = \sqrt{\text{Tr}(X X^T)}$$
$$df(X) = \frac{1}{2 \sqrt{\text{Tr}(X X^T)}} \cdot d(\text{Tr}(X X^T))$$
$$df(X) = \frac{1}{2 ||X||_F} \cdot \text{Tr}(X dX^T + dX X^T)$$
$$df(X) = \frac{1}{2 ||X||_F} \cdot \text{Tr}(dX X^T + dX X^T)$$
$$df(X) = \frac{1}{2 ||X||_F} \cdot \text{Tr}(dX X^T) +  \text{Tr}(dX X^T)$$
$$df(X) = \frac{1}{2 ||X||_F} \cdot \text{Tr}(X^T dX) +  \text{Tr}(X^T dX)$$
$$df(X) = \frac{1}{2 ||X||_F} \cdot 2 \cdot \text{Tr}(X^T dX)$$
$$df(X) = \frac{1}{||X||_F} \cdot \text{Tr}(X^T dX)$$

**Gradiente:** 

$$(\nabla X)^T = \frac{X^T}{||X||_F} $$
$$\nabla X = \frac{X}{||X||_F}$$

In [20]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]], requires_grad = True)

# Definimos Nuestra Funcion Escalar

func_torch = torch.norm(X)

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[0.0685, 0.1370, 0.4111],
        [0.2056, 0.2741, 0.4796],
        [0.3426, 0.2056, 0.5482]])


In [21]:
# Definimos Nuestra Matriz

X = jaxnp.array([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.linalg.norm(X)

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[0.06851887 0.13703774 0.4111132 ]
 [0.2055566  0.27407548 0.47963208]
 [0.34259436 0.2055566  0.54815096]]


In [22]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]])

# Calculamos el Gradiente

gradient = X / torch.norm(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[0.0685, 0.1370, 0.4111],
        [0.2056, 0.2741, 0.4796],
        [0.3426, 0.2056, 0.5482]])


# **Determinante**

$$\frac{\partial \, \det(X)}{\partial X} = \det(X) \, \text{Tr}(X^{-1})$$
$$\frac{\partial \, \ln(\det(X))}{\partial X} = \text{Tr}(X^{-1})$$
$$\frac{\partial \, \det(AX B)}{\partial X} = \det(AX B) \, \text{Tr}(X^{-1})^T$$


### **Ejercicio #1**

**Funcion:**

$$F(X) = \ln(\det(X))$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$\ln(\det(X)) = {\text{Tr}(\ln(X))}$$
$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$

**Derivada:**

$$df(X) = \text{Tr}(\ln(X))$$
$$df(X) = \text{Tr}(d\ln(X))$$
$$df(X) = \text{Tr}(\frac{1}{X} \cdot dX)$$
$$df(X) = \text{Tr}(X^{-1} dX)$$

**Gradiente:** 

$$(\nabla X)^T = X^{-1} $$
$$\nabla X = (X^{-1})^T$$

In [23]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]], requires_grad = True)

# Definimos Nuestra Funcion Escalar

func_torch = torch.log(torch.det(X))

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[-0.3333, -0.3333,  0.3333],
        [-0.0606,  0.6667, -0.2121],
        [ 0.3030, -0.3333,  0.0606]])


In [24]:
# Definimos Nuestra Matriz

X = jaxnp.array([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.log(jaxnp.linalg.det(X))

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[-0.33333337 -0.33333337  0.33333334]
 [-0.06060606  0.6666667  -0.21212122]
 [ 0.3030303  -0.33333334  0.06060606]]


In [25]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]])

# Calculamos el Gradiente

gradient = torch.linalg.inv(X).T

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[-0.3333, -0.3333,  0.3333],
        [-0.0606,  0.6667, -0.2121],
        [ 0.3030, -0.3333,  0.0606]])


### **Ejercicio #2**

**Funcion:**

$$F(X) = \det(X)$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$\det(X) = e^{\text{Tr}(\ln(X))}$$
$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$

**Derivada:**

$$df(X) = e^{\text{Tr}(\ln(X))}$$
$$df(X) = e^{\text{Tr}(\ln(X))} \cdot \text{Tr}(d\ln(X))$$
$$df(X) = e^{\text{Tr}(\ln(X))} \cdot \text{Tr}(\frac{1}{X} \cdot dX)$$
$$df(X) = e^{\text{Tr}(\ln(X))} \cdot \text{Tr}(X^{-1} dX)$$
$$df(X) = \det(X) \cdot \text{Tr}(X^{-1} dX)$$

**Gradiente:** 

$$(\nabla X)^T = \det(X) \cdot X^{-1} $$
$$\nabla X = \det(X) \cdot (X^{-1})^T$$

In [26]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]], requires_grad = True)

# Definimos Nuestra Funcion Escalar

func_torch = torch.det(X)

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[ 11.0000,  11.0000, -11.0000],
        [  2.0000, -22.0000,   7.0000],
        [-10.0000,  11.0000,  -2.0000]])


In [27]:
# Definimos Nuestra Matriz

X = jaxnp.array([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    return jaxnp.linalg.det(X)

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[ 11.  11. -11.]
 [  2. -22.   7.]
 [-10.  11.  -2.]]


In [28]:
# Definimos Nuestra Matriz

X = torch.tensor([[1.0, 2.0, 6.0], [3.0, 4.0, 7.0], [5.0, 3.0, 8.0]])

# Calculamos el Gradiente

gradient = torch.linalg.det(X) * torch.linalg.inv(X).T

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[ 11.0000,  11.0000, -11.0000],
        [  2.0000, -22.0000,   7.0000],
        [-10.0000,  11.0000,  -2.0000]])


### **Ejercicio #3**

**Funcion:**

$$F(X) = \det(AXB)$$

**Concepto Clave:** 

$$f'(X) = (\nabla X)^T$$
$$\nabla X = f'(X)^T$$

**Artificios:** 

$$\det(X) = e^{\text{Tr}(\ln(X))}$$
$$d(\text{Tr}(X)) = \text{Tr}(d(X))$$
$$\text{Tr}(ABC) = \text{Tr}(CAB) = \text{Tr}(BCA)$$

**Derivada:**

$$df(X) = e^{\text{Tr}(\ln(AXB))}$$
$$df(X) = e^{\text{Tr}(\ln(AXB))} \cdot \text{Tr}(d\ln(AXB))$$
$$df(X) = e^{\text{Tr}(\ln(AXB))} \cdot \text{Tr}(\frac{1}{AXB} \cdot d(AXB))$$
$$df(X) = e^{\text{Tr}(\ln(AXB))} \cdot \text{Tr}(\frac{1}{AXB} \cdot d(BAX))$$
$$df(X) = e^{\text{Tr}(\ln(AXB))} \cdot \text{Tr}(\frac{1}{AXB} \cdot (BAdX + BdAX + dBAX))$$
$$df(X) = e^{\text{Tr}(\ln(AXB))} \cdot \text{Tr}(\frac{1}{AXB} \cdot BAdX)$$
$$df(X) = e^{\text{Tr}(\ln(AXB))} \cdot \text{Tr}(\frac{1}{X} \cdot dX)$$
$$df(X) = \det(AXB) \cdot \text{Tr}(X^{-1} dX)$$

**Gradiente:** 

$$(\nabla X)^T = \det(AXB) \cdot X^{-1} $$
$$\nabla X = \det(AXB) \cdot (X^{-1})^T$$

In [29]:
# Definimos Nuestra Matrices

X = torch.tensor([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0], [5.0, 2.0, 3.0]], requires_grad = True)
A = torch.tensor([[2.0, 1.0, 0.5], [1.0, 3.0, 1.0], [0.5, 1.0, 2.0]], requires_grad = False)
B = torch.tensor([[5.0, 4.0, 5.0], [8.0, 7.0, 2.0], [10.0, 1.0, 1.0]], requires_grad = False)

# Definimos Nuestra Funcion Escalar

func_torch = torch.det(torch.matmul(torch.matmul(A, X), B))

# Calculamos el Gradiente 

func_torch.backward()

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X.grad}')

El Gradiente Respecto a X: 
tensor([[ 11731.5000, -70388.9844,  27373.4941],
        [-15642.0000,  62567.9766, -15641.9951],
        [ 19552.5000, -23463.0176,   3910.5015]])


In [30]:
# Definimos Nuestras Matrices

X = jaxnp.array([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0], [5.0, 2.0, 3.0]])
A = jaxnp.array([[2.0, 1.0, 0.5], [1.0, 3.0, 1.0], [0.5, 1.0, 2.0]])
B = jaxnp.array([[5.0, 4.0, 5.0], [8.0, 7.0, 2.0], [10.0, 1.0, 1.0]])

# Definimos Nuestra Funcion Escalar

def func_jax(X):
    
    return jaxnp.linalg.det(jaxnp.matmul(jaxnp.matmul(A, X), B))

# Calculamos el Gradiente 

func_grad = grad(fun = func_jax)

# Obtenemos el Gradiente con respecto a X 

X_grad = func_grad(X)

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{X_grad}')

El Gradiente Respecto a X: 
[[ 11731.5 -70389.   27373.5]
 [-15642.   62568.  -15642. ]
 [ 19552.5 -23463.    3910.5]]


In [31]:
# Definimos Nuestra Matrices

X = torch.tensor([[1.0, 2.0, 7.0], [3.0, 4.0, 9.0], [5.0, 2.0, 3.0]])
A = torch.tensor([[2.0, 1.0, 0.5], [1.0, 3.0, 1.0], [0.5, 1.0, 2.0]])
B = torch.tensor([[5.0, 4.0, 5.0], [8.0, 7.0, 2.0], [10.0, 1.0, 1.0]])

# Calculamos el Gradiente

gradient = torch.linalg.det(torch.matmul(torch.matmul(A, X), B)) * torch.linalg.inv(X).T

# Visualizamos 

print(f'El Gradiente Respecto a X: \n{gradient}')

El Gradiente Respecto a X: 
tensor([[ 11731.5010, -70389.0078,  27373.5000],
        [-15642.0000,  62568.0000, -15642.0000],
        [ 19552.4961, -23463.0000,   3910.5010]])
