In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import gymnasium as gym
from mujoco import viewer
from stable_baselines3 import SAC


In [3]:
import torch
from torch import nn
from torch.distributions.normal import Normal
from transformers import DistilBertConfig, DistilBertModel


In [3]:
import numpy as np
import torch
from numpy.typing import NDArray
from torch import nn
from transformers import DistilBertConfig, DistilBertModel


class CustomModel(nn.Module):
    def __init__(self,
                #  observation_space,
                #  features_dim,
                #  action_space,
                #  lr_schedule,
                 num_struct_elements: int,
                 attention_mask: NDArray[np.int_],
                 components_mask: NDArray[np.int_]):
        self.bert_config = DistilBertConfig(
            vocab_size=10000,
            hidden_size=1,
            num_hidden_layers=2,
            num_attention_heads=1,
            intermediate_size=100,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=50,
            type_vocab_size=2,
            initializer_range=0.02,
            layer_norm_eps=1e-12,
            pad_token_id=0,
            position_embedding_type="absolute",
            use_cache=True,
            classifier_dropout=None,
        )
        super().__init__()
        self.num_struct_elements = num_struct_elements
        self.attention_mask = attention_mask
        self.components_mask = components_mask
        self.distilbert_1 = DistilBertModel(self.bert_config)
        self.distilbert_2 = DistilBertModel(self.bert_config)

    def forward(
        self,
        inputs_embeds: NDArray[np.float_],
        # num_struct_elements: int = 9,
        # attention_mask: NDArray[np.int_] = None,
        # components_mask: NDArray[np.int_] = None,
    ):
        attention_mask = torch.from_numpy(self.attention_mask).to(torch.int64)
        components_mask = torch.from_numpy(self.components_mask).to(torch.int64)

        embeds = np.array([inputs_embeds for _ in range(self.num_struct_elements)])

        embeds = torch.from_numpy(embeds).to(torch.float32)

        embeds = embeds.view(embeds.size()[0], embeds.size()[1], 1)
        outputs_1 = self.distilbert_1(
            # input_ids=torch.ones(input_ids.size()),
            inputs_embeds=embeds,
            attention_mask=attention_mask,
        )

        last_hidden_state_1 = outputs_1["last_hidden_state"]

        input_2 = torch.sum(last_hidden_state_1, dim=2)
        input_2.mul_(components_mask)  # summing through columns
        input_2 = torch.sum(input_2, dim=0)
        ones_vector = torch.ones(self.num_struct_elements, 1)
        input_2 = ones_vector @ input_2.view(1, input_2.size()[0])

        input_2 = input_2.view(input_2.size()[0], input_2.size()[1], 1)

        outputs_2 = self.distilbert_2(
            # input_ids=torch.ones(input_2.size()),
            inputs_embeds=input_2,
            attention_mask=attention_mask,
        )
        last_hidden_state_2 = outputs_2["last_hidden_state"]

        input_2 = torch.sum(last_hidden_state_2, axis=2)
        input_2.mul_(components_mask)
        # summing through columns
        input_2 = torch.sum(input_2, axis=0)
        return input_2


In [126]:
class QHead(nn.Module):
    def __init__(self, size_input, size_action):
        super(QHead, self).__init__()
        self.size_input = size_input
        self.size_action = size_action
        self.layer = nn.Sequential(
            nn.Linear(self.size_input, self.size_action),
            nn.LeakyReLU(0.01),
            nn.Linear(self.size_action, 1),
        )

    def forward(self, input):
        return self.layer(input)


class PiHead(nn.Module):
    def __init__(self, size_input, size_action):
        super(PiHead, self).__init__()
        self.size_input = size_input
        self.size_action = size_action
        self.layer = nn.Sequential(
            nn.Linear(self.size_input, self.size_action),
            nn.LeakyReLU(0.01),
        )

        self.last_lin = nn.Linear(1, 2)

    def forward(self, input):
        output_1 = self.layer(input)
        # ones_vector = torch.ones(self.size_action, 1)
        # output_1 = ones_vector @ output_1.view(1, self.size_action)
        return self.last_lin(output_1.view(self.size_action, 1))


In [4]:
v = CustomModel(num_struct_elements=2,
    attention_mask=np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]),
    components_mask=np.array([[1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]),)

In [5]:
out = v(
    inputs_embeds=np.array([0.9086, 0, 0.4586, 0.60, 0.1300])
)


In [6]:
out

tensor([0., 0., 0., 0., 0.], grad_fn=<SumBackward1>)

In [94]:
b = PiHead(5, 3)
n = b(out)
n

tensor([[-0.3234,  0.1756],
        [-0.3457,  0.1512],
        [-0.3450,  0.1520]], grad_fn=<AddmmBackward0>)

In [95]:
from pathlib import Path

import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
from mujoco import viewer
from stable_baselines3 import SAC


In [127]:
ant_xml = Path("assets/ant.xml").resolve()
# env = gym.make('Ant-v4', ctrl_cost_weight=0.1, use_contact_forces=True, render_mode="human")
env = gym.make("Ant-v4", xml_file=str(ant_xml), render_mode="rgb_array")

In [128]:
observation, info = env.reset()

In [129]:
observation

array([ 8.34447477e-01,  9.92621542e-01,  6.75538196e-02, -9.93757526e-02,
       -1.62300769e-02,  1.15063916e-02,  8.59142021e-02, -1.91279499e-02,
        4.14099744e-02, -9.77306455e-02,  8.05495065e-02,  4.75159271e-03,
        8.14847604e-02, -4.77236883e-02, -6.88342638e-02, -6.63389699e-02,
        1.86202157e-03, -3.65168499e-02, -6.55175676e-02, -2.71269819e-02,
       -6.80848270e-02, -2.15879968e-01, -4.89127362e-02,  6.45033340e-02,
        8.09131860e-02, -4.23027574e-04,  1.49368267e-01])

In [130]:
policy_kwargs = dict(
    features_extractor_class=CustomModel,
    features_extractor_kwargs=dict(num_struct_elements=1,
    attention_mask=np.array([1]*27),
    components_mask=np.array([1]*27),),
)

In [131]:
model = SAC("MlpPolicy", env, policy_kwargs=policy_kwargs, verbose=1)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


AttributeError: 'CustomModel' object has no attribute 'features_dim'

In [12]:
import numpy as np

d = 1
d_k = 1

# Assume these are given
X = np.random.rand(4, d)  # Replace with actual embeddings
W_q = np.random.rand(d, d_k)  # Replace with actual W_q
W_k = np.random.rand(d, d_k)  # Replace with actual W_k
G = np.array([[1, 1, 0, 0],
              [1, 1, 1, 0],
              [0, 1, 1, 1],
              [0, 0, 1, 1]])

# Compute Q and K
Q = np.dot(X, W_q)
K = np.dot(X, W_k)

# Compute Scores
Scores = np.dot(Q, K.T)

# Mask Scores
Masked_Scores = Scores * G

# Apply Softmax
A = np.exp(Masked_Scores) / np.exp(Masked_Scores).sum(axis=1, keepdims=True)

# Compute New Node Embeddings
Z = np.dot(A, X)

In [13]:
print('X', X)
print('W_q', W_q)
print('W_k', W_k)
print('Q', Q)
print('K', K)
print('Scores', Scores)
print('Masked_Scores', Masked_Scores)
print('A', A)
print('Z', Z)

X [[0.08576015]
 [0.50997662]
 [0.34750299]
 [0.03052557]]
W_q [[0.21283992]]
W_k [[0.93008227]]
Q [[0.01825318]
 [0.10854339]
 [0.07396251]
 [0.00649706]]
K [[0.079764  ]
 [0.47432021]
 [0.32320637]
 [0.02839129]]
Scores [[0.00145595 0.00865785 0.00589955 0.00051823]
 [0.00865785 0.05148432 0.03508191 0.00308169]
 [0.00589955 0.03508191 0.02390515 0.00209989]
 [0.00051823 0.00308169 0.00209989 0.00018446]]
Masked_Scores [[0.00145595 0.00865785 0.         0.        ]
 [0.00865785 0.05148432 0.03508191 0.        ]
 [0.         0.03508191 0.02390515 0.00209989]
 [0.         0.         0.00209989 0.00018446]]
A [[0.24973041 0.25153544 0.24936708 0.24936708]
 [0.24618945 0.2569619  0.25278148 0.24406717]
 [0.24618416 0.25497405 0.25214013 0.24670166]
 [0.24985717 0.24985717 0.25038239 0.24990326]]
Z [[0.24396199]
 [0.24745041]
 [0.24629375]
 [0.24348617]]
