In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, Dataset, DataLoader
from functorch import jacrev  # Import from functorch

from torchvision import datasets, transforms
import matplotlib.pyplot as plt

import Double_Pendulum.Lumped_Mass.robot_parameters as robot_parameters
import Double_Pendulum.Lumped_Mass.transforms as transforms
import Double_Pendulum.Lumped_Mass.dynamics as dynamics

In [2]:
rp = robot_parameters.LUMPED_PARAMETERS
print(rp)

{'l1': 2, 'l2': 2, 'm': 3, 'g': 9.81, 'xa': 5, 'ya': 1}


In [3]:
import numpy as np

# Number of samples
q1_low  = -torch.pi/2
q1_high =  torch.pi/2
q2_low  = -torch.pi/2
q2_high =  torch.pi/2
q1_d_low  = -torch.pi/2
q1_d_high =  torch.pi/2
q2_d_low  = -torch.pi/2
q2_d_high =  torch.pi/2


n_samples = 100000

# Generate uniformly distributed points for q1 and q2
q1 = torch.linspace(q1_low, q1_high, n_samples)
q2 = torch.linspace(q2_low, q2_high, n_samples)
q1_d = torch.linspace(q1_d_low, q1_d_high, n_samples)
q2_d = torch.linspace(q2_d_low, q2_d_high, n_samples)

idx = torch.randperm(q2.shape[0])
idx2 = torch.randperm(q1_d.shape[0])
idx3 = torch.randperm(q2_d.shape[0])

q2 = q2[idx]
q1_d = q1_d[idx2]
q2_d = q2_d[idx3]

# Stack q1 and q2 to get the 2D coordinates
points = torch.stack([q1, q2, q1_d, q2_d], axis=1)  # Shape will be (1000, 2)

In [4]:
# Use TensorDataset to create the dataset
dataset = TensorDataset(points)

# Create the DataLoader with batch size and shuffling
batch_size = 64
dataloader = DataLoader(dataset, 
                        batch_size=batch_size, 
                        shuffle=True,
                        num_workers=0,
                        pin_memory=True)


# Example usage: iterate through the DataLoader
for batch in dataloader:
    print(batch[0])
    break  # Just to show one batch

tensor([[ 1.5892e-01,  4.5674e-01, -9.0392e-01,  9.6443e-01],
        [ 9.2353e-01, -1.4327e+00, -4.5087e-01, -2.7073e-01],
        [ 1.2334e+00, -1.3141e+00,  1.3051e+00,  1.2553e+00],
        [-5.1662e-01, -4.8219e-01,  6.0908e-01,  7.0451e-02],
        [-6.7958e-01,  1.3651e+00, -9.4169e-01, -3.2920e-01],
        [-9.1908e-02,  5.4317e-01,  3.2172e-01, -3.6206e-01],
        [-8.6164e-01,  1.2425e+00, -1.2464e-01, -2.8229e-01],
        [-1.3327e+00,  1.0898e+00, -1.3319e-01, -1.5842e-01],
        [-1.4402e+00, -3.0375e-01, -1.0055e+00,  1.3621e+00],
        [-9.2943e-01,  1.1025e+00,  1.0716e+00,  1.5571e+00],
        [-1.3445e+00, -1.3805e+00,  5.0827e-01, -9.2061e-01],
        [ 1.1577e+00,  1.5690e+00, -1.1193e+00,  7.8530e-01],
        [ 3.4572e-01, -5.9859e-01,  1.2621e-01, -5.5560e-02],
        [ 1.4858e+00, -4.4845e-01,  9.6974e-01,  1.4848e+00],
        [ 6.0544e-01,  7.0198e-01,  8.7826e-01,  1.2609e-01],
        [ 6.0162e-03, -1.2752e+00,  9.8045e-01, -1.5315e+00],
        

  return torch._C._cuda_getDeviceCount() > 0


In [7]:
class SinCosLayer(nn.Module):
    def __init__(self):
        super(SinCosLayer, self).__init__()

    def forward(self, x):
        # Apply sin() and cos() to both coordinates
        x_sin = torch.sin(x)
        x_cos = torch.cos(x)
        x_sin_cos_shape = (x.shape[0], x.shape[1]*2)
        x_sin_cos = torch.empty(x_sin_cos_shape, dtype=x_sin.dtype, device=x.device)
        x_sin_cos[:,0::2] = x_sin
        x_sin_cos[:,1::2] = x_cos
        return x_sin_cos
    
# Custom layer to reverse the interleaved sin() and cos() back to original coordinates
class InverseSinCosLayer(nn.Module):
    def __init__(self):
        super(InverseSinCosLayer, self).__init__()

    def forward(self, x):
        # x contains interleaved sin() and cos() values
        # Assuming input is of shape (batch_size, 4) for 2D coordinates
        sin_vals = x[:, 0::2]  # Extract sin values
        cos_vals = x[:, 1::2]  # Extract cos values

        # Use atan2 to recover the original angles from sin and cos
        original_coords = torch.atan2(sin_vals, cos_vals)
        return original_coords
    



class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            #SinCosLayer(),
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 1)#,
            #InverseSinCosLayer()
        )
        
        self.decoder = nn.Sequential(
            #SinCosLayer(),
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 2)#,
            #InverseSinCosLayer()
        )
    
    def forward(self, q):
        theta_1 = transforms.analytic_theta_1(rp, q).unsqueeze(1)
        theta_2 = self.encoder(q)
        theta = torch.stack((theta_1, theta_2), dim=1).squeeze(2)
        q_hat = self.decoder(theta)
        return(theta, q_hat)

In [14]:
%%time

rp = robot_parameters.LUMPED_PARAMETERS

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Autoencoder().to(device)  # Move model to GPU

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
#scheduler.get_last_lr()


num_epochs = 30
lambda_reg = 1e-4
outputs = []

# Define a function that takes `q` as input and returns `theta`
def compute_theta(q):
    return model(q)[0]  # This will return `theta`


for epoch in range(num_epochs):
    for (batch) in dataloader:
        print("got to 1")
        q = batch[0][:, 0:2].to(device)
        print("got to 2")
        q_d = batch[0][:, 2:4].to(device)
        print("got to 3")
        theta, q_hat = model(q)
        
        #Jh_func = torch.vmap(jacrev(compute_theta)) Doesn't work due to batch handling in transforms.py
        Jh_func = jacrev(compute_theta)
        
        Jh_messy = Jh_func(q)
        
        Jh_blocks = Jh_messy.view(batch_size, 2, batch_size, 2)  # [64, 2, 64, 2]
        Jh = torch.stack([Jh_blocks[i, :, i, :] for i in range(batch_size)])
        
        Jh_inv = torch.linalg.inv(Jh)
        Jh_invtrans = Jh_inv.transpose(1, 2)
        
        print(Jh[0])
        print(Jh_inv[0])
        print(Jh_invtrans[0])
        
        matrices_vmap = torch.vmap(dynamics.dynamical_matrices, (None, 0, 0))
        Mq, Cq, Gq = matrices_vmap(rp, q, q_d)
        

        loss_reconstruction = criterion(q_hat, q)
        loss_theta = criterion(theta, q)
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        loss = loss_reconstruction + loss_theta #+ lambda_reg * l1_norm
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step()
    #print(scheduler.get_last_lr())
        
    print(f'Epoch:{epoch+1}, Loss:{loss.item():.9f}')
    print("l1 norm loss:", (l1_norm*lambda_reg).item())
    outputs.append((epoch, q, q_hat, theta))

got to 1
got to 2
got to 3
tensor([[1.5696, 0.1522],
        [0.0032, 0.0091]], grad_fn=<SelectBackward0>)
tensor([[  0.6593, -11.0258],
        [ -0.2291, 113.6901]], grad_fn=<SelectBackward0>)
tensor([[  0.6593,  -0.2291],
        [-11.0258, 113.6901]], grad_fn=<SelectBackward0>)




RuntimeError: vmap: It looks like you're calling .item() on a Tensor. We don't support vmap over calling .item() on a Tensor, please try to rewrite what you're doing with other operations. If error is occurring somewhere inside PyTorch internals, please file a bug report.

In [None]:
import random
import string

max_neurons = 8
blank_layer = [None for _ in range(max_neurons)]

table_layers = []
for idx, param in enumerate(model.parameters()):
    layer = param.data
    num_parallel = layer.shape[0]
    side_padding = int((max_neurons - num_parallel)/2)
    
    if idx % 2 == 0:
        
        table_layer = blank_layer.copy()
        table_layer[0] = "weights" + str(idx//2+1)
        table_layers.append(table_layer)
        for i in range(layer.shape[1]):
            table_layer = blank_layer.copy()
            for j in range(num_parallel):
                table_layer[j+side_padding] = '{:.2e}'.format(layer[j][i].item())
            table_layers.append(table_layer)
        table_layers.append(blank_layer)
            
    else:  
        
        table_layer = blank_layer.copy()
        table_layer[0] = "bias" + str(idx//2+1)
        table_layers.append(table_layer)
        table_layer = blank_layer.copy()
        for j in range(num_parallel):
            table_layer[j+side_padding] = '{:.2e}'.format(layer[j].item())
        table_layers.append(table_layer)
        table_layers.append(blank_layer)

numeric_values = np.zeros((len(table_layers), max_neurons))
for i, row in enumerate(table_layers):
    for j, item in enumerate(row):
        if item not in (None, "weights1", "weights2", "bias1", "bias2"):  # Replace with relevant layer names
            try:
                numeric_values[i, j] = (float(item))
            except ValueError:
                pass
        
min_val, max_val = numeric_values.min(), numeric_values.max()


# Step 2: Apply a logarithmic transformation, setting a small threshold to avoid log(0)
threshold = 1e-5
log_values = np.log10(np.clip(np.abs(numeric_values), threshold, None))

# Normalize the log-scaled values to range between 0 and 1
normalized_values = (log_values - log_values.min()) / (log_values.max() - log_values.min())
colors = plt.cm.Blues(normalized_values)

        
# Plot the table
fig, ax = plt.subplots(figsize=(10, 6))
ax.axis('tight')
ax.axis('off')

# Create table
table = plt.table(cellText=table_layers, cellColours=colors, loc='center', cellLoc='center')

plt.show()