# Algo Playground

### Playground for GPT_Backbone

In [None]:
from robomimic.models.transformers import GPT_Backbone
import robomimic.utils.tensor_utils as TensorUtils
import torch 

embed_dim = 16
context_length = 10

gpt = GPT_Backbone(embed_dim=embed_dim, context_length=context_length)
print(gpt)

x = TensorUtils.to_batch(torch.randn((context_length, embed_dim)))
y = gpt(x)
print("output shape:")
print(y.shape)

### Playground for RNN_Base

In [None]:
from robomimic.models.base_nets import RNN_Base, MLP
import torch

batch_size = 16
seq_len = 10
input_dim = 20 
output_dim = 20
hidden_dim = 100

per_step_net = MLP(input_dim=hidden_dim, output_dim=output_dim, layer_dims=(25,))
# create stacked LSTM
rnn = RNN_Base(input_dim=input_dim, rnn_hidden_dim=hidden_dim, rnn_num_layers=2, per_step_net=per_step_net)
print(rnn)

x = torch.randn((batch_size, seq_len, input_dim))
h0, c0 = torch.randn((2, batch_size, hidden_dim)), torch.randn((2, batch_size, hidden_dim))
y, state = rnn(x, (h0, c0), return_state=True)
h, c = state
print("output shape:")
print(y.shape)
print("hidden state shape:")
print(h.shape)
print("context state shape:")
print(c.shape)

### Playground for ObservationEncoder

In [None]:
from robomimic.models.obs_nets import ObservationEncoder
import robomimic.utils.tensor_utils as TensorUtils
import torch

obs_encoder = ObservationEncoder()


# register rgb encoder
rgb_input_dim = (3, 224, 224)
net_kwargs = {
    "input_shape": rgb_input_dim, # don't include batch_size in input_shape
    "backbone_class": "ResNet18Conv", # backbone 
    "backbone_kwargs": {"pretrained": True, "input_coord_conv": False},
    "pool_class": "SpatialSoftmax", # maps features to embedding of shape (batch_size, num_kp, 2)
    "pool_kwargs": {"num_kp": 32},
    "flatten": True, # default value, flattens embedding to shape (batch_size, num_kp*2)
    "feature_dimension": 64 # default value, projects embedding to shape (batch_size, feature_dimension)
}
obs_encoder.register_obs_key(
    name="rgb",
    shape=rgb_input_dim,
    net_class="VisualCore", # combines visual backbone with pooling
    net_kwargs=net_kwargs,
)

# register low_dim encoder (ee_pos, ee_quat, etc)
proprio_input_dim = 7
obs_encoder.register_obs_key(
    name="low_dim",
    shape=proprio_input_dim,
) # if we don't specify net_class, obs is flattened and concatenated

obs_encoder.make()

x = dict()
x["rgb"] = TensorUtils.to_batch(torch.rand(rgb_input_dim))
x["low_dim"] = TensorUtils.to_batch(torch.rand(proprio_input_dim))
y = obs_encoder(x)
# concat (1, 64) and (1, 7) -> (1, 71)
print("encoded state shape:")
print(y.shape)

