In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
from typing import List

from ur_sim.demonstrations import Demonstration
from ur_sim.push_env import UR3ePush
from pathlib import Path

import torch
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
from torch.nn.functional import mse_loss
import os
import numpy as np
import pickle
import wandb

In [5]:
env = UR3ePush(state_observation=True, push_primitive=False,real_time=False)
test_env = UR3ePush(state_observation=True, push_primitive=False, real_time=True)

In [6]:
def collect_demos(n_episodes):
    demos = env.collect_demonstrations(n_episodes,str(Path(os.path.abspath('')).parent / "push_demos.pkl"))
    return demos

In [7]:
def collect_dummy_demos(n_samples):
    x = np.random.rand(4*n_samples).reshape((n_samples,-1))
    y = np.zeros((n_samples,2))
    # dummy actions made up of difference of inputs (e.g. point mass target)
    y[:,0] = x[:,1] - x[:,0]
    y[:,1] = x[:,2] - x[:,3]

    random_episode_size = 21
    x = np.array_split(x,random_episode_size)
    y = np.array_split(y,random_episode_size)
    demonstrations = []
    for i in range(len(x)):
        demonstration = Demonstration()
        demonstration.observations = x[i].tolist()
        demonstration.actions = y[i][:-1].tolist() # final obs has no action!

        demonstrations.append(demonstration)
    return demonstrations


In [8]:
class DemonstrationDataset(Dataset):
    def __init__(self, demonstrations: List[Demonstration]):
        super().__init__()
        self.demonstrations = demonstrations

        # preprocess from rollout to single list
        self.observations = []
        self.actions = []
        for demonstration in self.demonstrations:
            self.observations.extend(demonstration.observations[:-1]) # remove final observation
            self.actions.extend(demonstration.actions)
        self.observations = torch.tensor(np.array(self.observations))
        self.actions = torch.tensor(np.array(self.actions))
    def __len__(self):
        return len(self.observations)

    def __getitem__(self, idx):
        #todo: handle images

        obs = self.observations[idx].float()
        action = self.actions[idx].float()

        return obs,action


In [28]:
from pytorch_lightning.loggers import WandbLogger

class BC(pl.LightningModule):
    def __init__(self, agent, lr):
        super().__init__()
        self.agent = agent
        self.lr = lr
        self.criterion = nn.MSELoss()

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(),lr=self.lr)

    def forward(self, x):
        return self.agent(x)

    def _shared_step(self, batch,idx):
        observations, actions = batch
        predicted_actions = self.agent(observations)
        loss = self.criterion(predicted_actions, actions)
        return loss

    def training_step(self, train_batch, idx):
        loss = self._shared_step(train_batch,idx)
        self.log("train/loss",loss,prog_bar=True)
        return loss

    def validation_step(self, val_batch,idx):
        loss = self._shared_step(val_batch,idx)
        self.log("val/loss",loss,prog_bar=True)
        return loss

    def on_validation_epoch_end(self):
        if self.current_epoch % 5 != 0:
            return
        n_test_episodes = 10
        rewards = []
        for i in range(n_test_episodes):
            done = False
            obs = test_env.reset()
            episode_cum_reward = 0.0
            while not done:
                obs = torch.tensor(obs)
                obs = torch.unsqueeze(obs,0)
                action = self.forward(obs)
                action = torch.squeeze(action).detach().numpy()
                obs, reward, done, _ = test_env.step(action)
                episode_cum_reward += reward
            rewards.append(episode_cum_reward)

        self.log("test/mean_cum_reward",sum(rewards)/len(rewards))

In [10]:
    collect_demos(3000)

[autoreload of ur_sim.demonstrations failed: Traceback (most recent call last):
  File "/home/tlips/miniconda3/envs/reward-learning-block-puzzle/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 257, in check
    superreload(m, reload, self.old_objects)
  File "/home/tlips/miniconda3/envs/reward-learning-block-puzzle/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 480, in superreload
    update_generic(old_obj, new_obj)
  File "/home/tlips/miniconda3/envs/reward-learning-block-puzzle/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 377, in update_generic
    update(a, b)
  File "/home/tlips/miniconda3/envs/reward-learning-block-puzzle/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 329, in update_class
    if update_generic(old_obj, new_obj):
  File "/home/tlips/miniconda3/envs/reward-learning-block-puzzle/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 377, in update_generic
    update(a, 

AttributeError: 'Demonstration' object has no attribute 'observations'

In [29]:
    batch_size = 32
    lr = 1e-4
    epochs = 20
    with open(str(Path(os.path.abspath('')).parent / "push_demos.pkl"), "rb") as f:
        demos = pickle.load(f)
    train_demos = demos[:int(len(demos)*0.8)]
    val_demos = demos[int(len(demos)*0.8):]
    print(len(train_demos))
    print(len(val_demos))


    train_set, val_set = DemonstrationDataset(train_demos), DemonstrationDataset(val_demos)
    train_loader, val_loader = DataLoader(train_set,batch_size=batch_size,shuffle=True), DataLoader(val_set,batch_size=batch_size, shuffle=False)

    print(len(train_set))
    print(train_set[0])

    input_dim = train_set[0][0].shape[0]
    output_dim = train_set[0][1].shape[0]
    agent = nn.Sequential(
        nn.Linear(input_dim, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, output_dim),
        nn.Tanh()
    )
    bc = BC(agent,lr)

800
200
9786
(tensor([-0.0050, -0.2569,  0.1063,  0.0430, -0.3394, -0.0500, -0.3500]), tensor([ 0.0500, -0.0242,  0.0046]))


In [30]:
    wandb.finish()
    wandb_logger = WandbLogger(project="ur_pusher", name = "bc")
    trainer = pl.Trainer(max_epochs=epochs,log_every_n_steps=1,logger=wandb_logger)
    trainer.fit(bc,train_loader,val_loader)

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/loss,█▅▇▇▂▆▂▆▄▄▇▂▆▃▆▆▇▃▅▃█▆▃▃▅▄▄▆▃▇▆▃▃▆▅▄▁▃▆▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val/loss,█▇▇▆▅▅▅▄▅▄▃▆▃▄▂▂▂▃▁▂

0,1
epoch,19.0
train/loss,0.00017
trainer/global_step,6119.0
val/loss,0.0003


GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name      | Type       | Params
-----------------------------------------
0 | agent     | Sequential | 4.9 K 
1 | criterion | MSELoss    | 0     
-----------------------------------------
4.9 K     Trainable params
0         Non-trainable params
4.9 K     Total params
0.019     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


/ImportURDFDemo/BulletUrdfImporter.cpp,126]:

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial 

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

l inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1,

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

ing mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_l

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

arning[examples/Importers/ImportURDFDemo/BulletUrdfImporter.cpp,126]:

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inert

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

dfImporter.cpp,126]:

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base_link

b3Printf: No inertial data for link, using mass=1, localinertiadiagonal = 1,1,1, identity local inertial frame

b3Printf: base

b3Printf: No inertial data for link, using mas

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.
