In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, Dataset, DataLoader
import functorch

from torchvision import datasets, transforms
import matplotlib.pyplot as plt

import Double_Pendulum.Lumped_Mass.robot_parameters as robot_parameters
import Double_Pendulum.Lumped_Mass.transforms as transforms
import Double_Pendulum.Lumped_Mass.dynamics as dynamics

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

rp = robot_parameters.LUMPED_PARAMETERS
print(rp)

{'l1': 2, 'l2': 2, 'm': 3, 'g': 9.81, 'xa': 5, 'ya': 1}


In [3]:
import numpy as np

# Number of samples
q1_low  = -torch.pi/2
q1_high =  torch.pi/2
q2_low  = -torch.pi/2
q2_high =  torch.pi/2
q1_d_low  = -torch.pi/2
q1_d_high =  torch.pi/2
q2_d_low  = -torch.pi/2
q2_d_high =  torch.pi/2


n_samples = 100000

# Generate uniformly distributed points for q1 and q2
q1 = torch.linspace(q1_low, q1_high, n_samples)
q2 = torch.linspace(q2_low, q2_high, n_samples)
q1_d = torch.linspace(q1_d_low, q1_d_high, n_samples)
q2_d = torch.linspace(q2_d_low, q2_d_high, n_samples)

idx = torch.randperm(q2.shape[0])
idx2 = torch.randperm(q1_d.shape[0])
idx3 = torch.randperm(q2_d.shape[0])

q2 = q2[idx]
q1_d = q1_d[idx2]
q2_d = q2_d[idx3]

# Stack q1 and q2 to get the 2D coordinates
points = torch.stack([q1, q2, q1_d, q2_d], axis=1)#.to(device)  # Shape will be (1000, 2)

In [4]:
# Use TensorDataset to create the dataset
dataset = TensorDataset(points)

# Create the DataLoader with batch size and shuffling
batch_size = 3
dataloader = DataLoader(dataset, 
                        batch_size=batch_size, 
                        shuffle=True,
                        num_workers=0,
                        pin_memory=True)


# Example usage: iterate through the DataLoader
for batch in dataloader:
    #print(batch[0])
    break  # Just to show one batch

In [26]:
class SinCosLayer(nn.Module):
    def __init__(self):
        super(SinCosLayer, self).__init__()

    def forward(self, x):
        # Apply sin() and cos() to both coordinates
        x_sin = torch.sin(x)
        x_cos = torch.cos(x)
        x_sin_cos_shape = (x.shape[0], x.shape[1]*2)
        x_sin_cos = torch.empty(x_sin_cos_shape, dtype=x_sin.dtype, device=x.device)
        x_sin_cos[:,0::2] = x_sin
        x_sin_cos[:,1::2] = x_cos
        return x_sin_cos
    
# Custom layer to reverse the interleaved sin() and cos() back to original coordinates
class InverseSinCosLayer(nn.Module):
    def __init__(self):
        super(InverseSinCosLayer, self).__init__()

    def forward(self, x):
        # x contains interleaved sin() and cos() values
        # Assuming input is of shape (batch_size, 4) for 2D coordinates
        sin_vals = x[:, 0::2]  # Extract sin values
        cos_vals = x[:, 1::2]  # Extract cos values

        # Use atan2 to recover the original angles from sin and cos
        original_coords = torch.atan2(sin_vals, cos_vals)
        return original_coords
    



class Autoencoder(nn.Module):
    def __init__(self, rp):
        super().__init__()
        self.encoder = nn.Sequential(
            #SinCosLayer(),
            nn.Linear(2, 4),
            nn.Sigmoid(),
            nn.Linear(4, 8),
            nn.Sigmoid(),
            nn.Linear(8, 8),
            nn.Sigmoid(),
            nn.Linear(8, 4),
            nn.Sigmoid(),
            nn.Linear(4, 1)#,
            #InverseSinCosLayer()
        )
        
        
        self.decoder = nn.Sequential(
            #SinCosLayer(),
            nn.Linear(2, 4),
            nn.Sigmoid(),
            nn.Linear(4, 8),
            nn.Sigmoid(),
            nn.Linear(8, 8),
            nn.Sigmoid(),
            nn.Linear(8, 4),
            nn.Sigmoid(),
            nn.Linear(4, 2)#,
            #InverseSinCosLayer()
        )
        
        self.rp = rp
        
    def encoder_ana(self, q):
        theta_1 = transforms.analytic_theta_1(self.rp, q).unsqueeze(0)
        return theta_1, theta_1
    

    def encoder_nn(self, q):
        theta_2 = self.encoder(q)
        return theta_2, theta_2
    
    def forward(self, q):
        
        Jh_1, theta_1 = torch.vmap(torch.func.jacfwd(self.encoder_ana, has_aux=True))(q)
        #print(theta_1.shape)
        print(Jh_1)
        
        Jh_2, theta_2 = torch.vmap(torch.func.jacfwd(self.encoder_nn, has_aux=True))(q)
        #print(theta_2.shape)
        print(Jh_2)
        
        theta = torch.stack((theta_1, theta_2), dim=1).squeeze(2)
        Jh = torch.cat((Jh_1, Jh_2), dim=1)
        #print(theta.shape)
        print(Jh)
        q_hat = self.decoder(theta)
        return(theta, Jh, q_hat)

In [27]:
%%time

rp = robot_parameters.LUMPED_PARAMETERS

model = Autoencoder(rp).to(device)  # Move model to GPU

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

num_epochs = 5
l_weights = [1,
             1,
             1,
             1]
outputs = []

for epoch in range(num_epochs):
    for index, batch in enumerate(dataloader):
        q = batch[0][:, 0:2].to(device)
        q.requires_grad = True
        q_d = batch[0][:, 2:4].to(device)
        
        theta, Jh, q_hat = model(q)
        
        #J_h_func = torch.vmap(torch.func.jacfwd(compute_theta)) #Doesn't work due to batch handling in transforms.py
        
        
        J_h_func = jacfwd(compute_theta)
        J_h_messy = J_h_func(q).to(device)
        
        
        batch_size = batch[0].shape[0]
        
        J_h_blocks = J_h_messy.view(batch_size, 2, batch_size, 2)  # [64, 2, 64, 2]
        J_h = torch.stack([J_h_blocks[i, :, i, :] for i in range(batch_size)])
        
        J_h_inv = torch.linalg.pinv(J_h).to(device)
        J_h_inv_trans = J_h_inv.transpose(1,2).to(device)

        
        matrices_vmap = torch.vmap(dynamics.dynamical_matrices, 
                                   in_dims=(None, 0, 0))
        
        M_q, C_q, G_q = matrices_vmap(rp, q, q_d)
        
        M_th, C_th, G_th = transforms.transform_dynamical_matrices(M_q, C_q, G_q, J_h_inv, J_h_inv_trans)
        
        M_th_loss = M_th.detach().clone() #Make it matrix multiplication for zeroes
        M_th_loss[:, 0, 0] = 0.
        M_th_loss[:, 1, 1] = 0.
        
        
        # Use J@J^T = eye to avoid needing to calculate the Jacobian inverse for efficiency. 
        
        loss_reconstruction = criterion(q_hat, q)
        loss_decoupling = criterion(M_th_loss, torch.zeros((batch_size, 2, 2)))
        loss_jacobian = criterion(torch.bmm(J_h.transpose(-2,-1), J_h), torch.eye(2))
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        loss = loss_reconstruction + loss_decoupling + loss_jacobian#+ lambda_reg * l1_norm
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step()
    
        
    print(f'Epoch:{epoch+1}, Loss:{loss.item():.9f}')
    print("reconstruction loss:", (loss_reconstruction*l_weights[0]).item())
    print("decoupling loss:", (loss_decoupling*l_weights[1]).item())
    print("jacobian loss:", (loss_jacobian*l_weights[2]).item())
    print("l1 norm loss:", (l1_norm*l_weights[3]).item())
    outputs.append((epoch, q, q_hat, theta, M_th))

tensor([[[1.8751, 1.9107]],

        [[1.8776, 0.8944]],

        [[1.9880, 2.0000]]], device='cuda:0', grad_fn=<ViewBackward0>)
tensor([[[ 0.0002, -0.0001]],

        [[ 0.0001, -0.0001]],

        [[ 0.0002, -0.0001]]], device='cuda:0', grad_fn=<ViewBackward0>)
tensor([[[ 1.8751e+00,  1.9107e+00],
         [ 1.5688e-04, -1.4606e-04]],

        [[ 1.8776e+00,  8.9444e-01],
         [ 1.2739e-04, -1.2252e-04]],

        [[ 1.9880e+00,  2.0000e+00],
         [ 1.5821e-04, -1.3424e-04]]], device='cuda:0', grad_fn=<CatBackward0>)


NameError: name 'loss' is not defined

In [7]:
class DummyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Linear(2, 5)
        
    
    def forward(self, q):
        return nn.functional.sigmoid(self.encoder(q))


dummy_model = DummyModel()

In [8]:
B = 3
q_in = torch.tensor([[1., 4.],
                     [2., 5.],
                     [3., 6.]])
q_in.requires_grad = True
out = dummy_model(q_in)

partials = torch.autograd.grad(outputs=out, inputs=q_in, grad_outputs=torch.ones_like(out), retain_graph=False, create_graph=False)[0]
print(partials)

tensor([[ 0.1664, -0.3315],
        [ 0.1982, -0.3101],
        [ 0.2271, -0.2889]])


In [9]:
from torch.func import jacfwd, vmap

x = torch.randn(64, 2)
jacobian = vmap(jacfwd(compute_theta))(x)
print(jacobian)
print(jacobian.shape)

IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number

In [None]:
import random
import string

max_neurons = 8
blank_layer = [None for _ in range(max_neurons)]

table_layers = []
for idx, param in enumerate(model.parameters()):
    layer = param.data
    num_parallel = layer.shape[0]
    side_padding = int((max_neurons - num_parallel)/2)
    
    if idx % 2 == 0:
        
        table_layer = blank_layer.copy()
        table_layer[0] = "weights" + str(idx//2+1)
        table_layers.append(table_layer)
        for i in range(layer.shape[1]):
            table_layer = blank_layer.copy()
            for j in range(num_parallel):
                table_layer[j+side_padding] = '{:.2e}'.format(layer[j][i].item())
            table_layers.append(table_layer)
        table_layers.append(blank_layer)
            
    else:  
        
        table_layer = blank_layer.copy()
        table_layer[0] = "bias" + str(idx//2+1)
        table_layers.append(table_layer)
        table_layer = blank_layer.copy()
        for j in range(num_parallel):
            table_layer[j+side_padding] = '{:.2e}'.format(layer[j].item())
        table_layers.append(table_layer)
        table_layers.append(blank_layer)

numeric_values = np.zeros((len(table_layers), max_neurons))
for i, row in enumerate(table_layers):
    for j, item in enumerate(row):
        if item not in (None, "weights1", "weights2", "bias1", "bias2"):  # Replace with relevant layer names
            try:
                numeric_values[i, j] = (float(item))
            except ValueError:
                pass
        
min_val, max_val = numeric_values.min(), numeric_values.max()


# Step 2: Apply a logarithmic transformation, setting a small threshold to avoid log(0)
threshold = 1e-5
log_values = np.log10(np.clip(np.abs(numeric_values), threshold, None))

# Normalize the log-scaled values to range between 0 and 1
normalized_values = (log_values - log_values.min()) / (log_values.max() - log_values.min())
colors = plt.cm.Blues(normalized_values)

        
# Plot the table
fig, ax = plt.subplots(figsize=(10, 6))
ax.axis('tight')
ax.axis('off')

# Create table
table = plt.table(cellText=table_layers, cellColours=colors, loc='center', cellLoc='center')

plt.show()

In [None]:
test_q = torch.Tensor([1, 1])
test_q_d = torch.Tensor([1, 1])

M, C, G = dynamics.dynamical_matrices(rp, test_q, test_q_d)
print(M)
print(C)
print(G)

M2, C2, G2 = dynamics.dynamical_matrices_set(test_q, test_q_d)
print(M2)
print(C2)
print(G2)
