In [14]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
from sklearn.preprocessing import normalize

In [2]:
x = torch.rand((5)).reshape((1,-1))
y1 = torch.Tensor([0,0,0,1,0]).reshape((1,-1))
y2 = torch.Tensor([3]).long()
loss_fn = nn.CrossEntropyLoss()
print(loss_fn(x, y2))
print(loss_fn(x, y1))

tensor(1.8519)
tensor(1.8519)


In [3]:
def cross_entropy(p, q):
    assert len(p.shape) == 1
    assert len(q.shape) == 1
    assert len(p) == len(q)

    cross_entropy = 0
    for idx in range(len(p)):
        cross_entropy += (p[idx] * np.log(q[idx])).item()
    return -cross_entropy

In [4]:
# loss_fn = nn.CrossEntropyLoss()

p = np.array([100, 1, 1], dtype=np.float)
p = p / np.linalg.norm(p)
q1 = np.array([100, 1, 1], dtype=np.float)
q1 = q1 / np.linalg.norm(q1)
q2 = np.array([1, 1, 100], dtype=np.float)
q2 = q2 / np.linalg.norm(q2)
q3 = np.array([1, 1, 1], dtype=np.float)
q3 = q3 / np.linalg.norm(q3)

print(cross_entropy(p, q1))
print(cross_entropy(p, q2))
print(cross_entropy(p, q3))

0.09219617436462002
4.650858815019799
0.5602362463969972


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  This is separate from the ipykernel package so we can avoid doing imports until
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  import sys
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if __name__ == "__main__":


In [6]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size, hidden_size=128, num_hidden=2):
        nn.Module.__init__(self)
        activation_fn = nn.ReLU
        layers = [
            nn.Linear(input_size, hidden_size),
            activation_fn(),
            nn.LayerNorm(hidden_size),
        ]
        for _ in range(num_hidden):
            layers.extend(
                [
                    nn.Linear(hidden_size, hidden_size),
                    activation_fn(),
                    nn.LayerNorm(hidden_size),
                ]
            )
        layers.append(nn.Linear(hidden_size, output_size))

        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        return self.mlp(x)



class EncoderDecoderModule(nn.Module):
    def __init__(self, input_size, latent_space_size, args):
        super().__init__()
        self.encoder = MLP(
            input_size, latent_space_size, args.hidden_size, args.layer_N
        )
        self.decoder = MLP(
            latent_space_size, input_size, args.hidden_size, args.layer_N
        )

    def forward(self, x):
        z = self.enc_forward(x)
        return self.dec_forward(z)

    def enc_forward(self, x):
        return self.encoder(x)

    def dec_forward(self, z):
        return self.decoder(z)


In [29]:
def pretrain_enc_dec(
        policy, obs_size, act_size, traj_len, n_samples, batch_size, n_episodes
):
    n_batches = n_samples // batch_size
    model = policy
    optim = torch.optim.Adam(model.parameters())
    obs_loss_fn = nn.MSELoss()
    act_loss_fn = nn.CrossEntropyLoss()
    steps = 0

    step_size = obs_size + act_size

    for _ in range(n_episodes):
        for _ in range(n_batches):
            optim.zero_grad()
            x = torch.rand((batch_size, step_size * traj_len))
            y_obs = x.reshape(-1, step_size)[:, :obs_size]
            y_act = F.softmax(x.reshape(-1, step_size)[:, -act_size:], 1)

            y_pred = model(x)
            y_pred_obs = y_pred.reshape(-1, step_size)[:, :obs_size]
            y_pred_act = y_pred.reshape(-1, step_size)[:, -act_size:]

            obs_loss = obs_loss_fn(y_pred_obs, y_obs)
            act_loss = act_loss_fn(y_pred_act, y_act)
            loss = obs_loss + act_loss
            loss.backward()
            optim.step()
            print(f"Obs loss at step {steps}: {obs_loss:.4f}")
            print(f"Act loss at step {steps}: {act_loss:.4f}")
            # self.log_train({"enc_dec_pretrain_loss": loss.item()}, steps)
            # steps += batch_size

In [27]:
class Object(object):
    pass

obs_size = 18
act_size = 5
traj_len = 5
input_size = (obs_size + act_size) * traj_len
latent_space_size = 64
args = Object()
args.hidden_size = 128
args.layer_N = 4
model = EncoderDecoderModule(input_size, latent_space_size, args)


In [30]:
pretrain_enc_dec(model, obs_size, act_size, traj_len, 1000, 100, 10)

Obs loss at step 0: 0.0827
Act loss at step 0: 1.6097
Obs loss at step 0: 0.1247
Act loss at step 0: 1.6266
Obs loss at step 0: 0.0938
Act loss at step 0: 1.6136
Obs loss at step 0: 0.0933
Act loss at step 0: 1.6138
Obs loss at step 0: 0.0947
Act loss at step 0: 1.6138
Obs loss at step 0: 0.0940
Act loss at step 0: 1.6121
Obs loss at step 0: 0.0926
Act loss at step 0: 1.6116
Obs loss at step 0: 0.0871
Act loss at step 0: 1.6122
Obs loss at step 0: 0.0886
Act loss at step 0: 1.6121
Obs loss at step 0: 0.0882
Act loss at step 0: 1.6124
Obs loss at step 0: 0.0889
Act loss at step 0: 1.6117
Obs loss at step 0: 0.0884
Act loss at step 0: 1.6104
Obs loss at step 0: 0.0872
Act loss at step 0: 1.6103
Obs loss at step 0: 0.0880
Act loss at step 0: 1.6101
Obs loss at step 0: 0.0864
Act loss at step 0: 1.6104
Obs loss at step 0: 0.0875
Act loss at step 0: 1.6102
Obs loss at step 0: 0.0843
Act loss at step 0: 1.6109
Obs loss at step 0: 0.0865
Act loss at step 0: 1.6105
Obs loss at step 0: 0.0862
A