In [4]:
import sympy as sp

# Define time and theta(t)
t = sp.Symbol('t')
theta = sp.Function('theta')(t)

# First derivative
d1 = sp.diff(sp.sin(theta), t)

# Second derivative
d2 = sp.diff(d1, t)

# Third derivative
d3 = sp.diff(d2, t)

# Optional: define shorthand for derivatives
theta_dot = sp.Derivative(theta, t)
theta_ddot = sp.Derivative(theta, (t, 2))

# Print expressions
display("d/dt sin(theta):", d1.simplify())
display("d²/dt² sin(theta):", d2.simplify())
display("d³/dt³ sin(theta):", d3.simplify())

'd/dt sin(theta):'

cos(theta(t))*Derivative(theta(t), t)

'd²/dt² sin(theta):'

-sin(theta(t))*Derivative(theta(t), t)**2 + cos(theta(t))*Derivative(theta(t), (t, 2))

'd³/dt³ sin(theta):'

-3*sin(theta(t))*Derivative(theta(t), t)*Derivative(theta(t), (t, 2)) - cos(theta(t))*Derivative(theta(t), t)**3 + cos(theta(t))*Derivative(theta(t), (t, 3))

In [1]:
import torch
import einops

# A requires gradient
A = torch.randn(3, 3, requires_grad=True)

# Compute matrix exponential
E = torch.matrix_exp(A)

# Define a scalar loss (e.g., Frobenius norm squared)
loss = (E ** 2).sum()
loss.backward()

# Gradient w.r.t A is now stored in A.grad
print(A.grad)

tensor([[ 0.8604,  0.3529, -0.0859],
        [ 0.3847,  0.3696, -0.4524],
        [-0.0950, -0.4316,  0.8826]])


In [5]:
def flatten_batch(x_batch: torch.Tensor) -> torch.Tensor:
        return einops.rearrange(x_batch, 'b h nx -> (b h) nx')

def unflatten_batch(batch_size, x_flat_batch: torch.Tensor) -> torch.Tensor:
    return einops.rearrange(x_flat_batch, '(b h) nx -> b h nx', b=batch_size)

In [7]:
x_batch = torch.randn(32, 100, 10)

x_flat_batch = flatten_batch(x_batch)
x_batch_recon = unflatten_batch(32, x_flat_batch)
# Check if the original and reconstructed tensors are equal
assert torch.allclose(x_batch, x_batch_recon), "Reconstruction failed!"