In [2]:
import numpy as np
import numpy.random as npr
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split

import matplotlib.pyplot as plt

import torchvision
import torchvision.datasets as datasets

import itertools
import math
import string
import os

from macq import generate, extract
from macq.observation import IdentityObservation, AtomicPartialObservation

# First-order Methods

In [92]:
class Type:
    def __init__(self, name, parent):
        self.name = name
        self.parent = parent
        
    def is_child(self, another_type):
        if self.name == another_type.name:
            return True
        elif self.parent is None:
            return False
        else:
            return self.parent.is_child(another_type)

In [93]:
class Predicate:
  def __init__(self, name, params):
    self.name = name
    # params are dicts {Type: num}
    self.params = params
    self.params_types = sorted(params.keys(), key=lambda x: x.name)

  def proposition(self, sorted_obj_lists):
    return (self.name + ' ' + ' '.join([f'{self.params_types[i].name} '+f' {self.params_types[i].name} '.join(sorted_obj_lists[i])
                                       for i in range(len(sorted_obj_lists))])).strip()

  def ground(self, objects):
    '''
    Input a list of objects in the form {Type: []}
    Return all the propositions grounded from this predicates with the objects
    '''
    propositions = []
    obj_lists_per_params = {params_type:[] for params_type in self.params_types}
    for params_type in self.params_types:
        for obj_type in objects.keys():
            if obj_type.is_child(params_type):
                obj_lists_per_params[params_type].extend(objects[obj_type])
    for obj_lists in itertools.product(*[itertools.permutations(obj_lists_per_params[params_type], self.params[params_type])\
                                        for params_type in self.params_types]):
      propositions.append(self.proposition(obj_lists))
    return propositions
    
  def ground_num(self, objects):
    '''
    Return how many propositions this predicate can ground on the objects
    '''
    n_ground = 1
    for params_type in self.params_types:
      n_obj = 0
      for obj_type in objects.keys():
        if obj_type.is_child(params_type):
          n_obj += len(objects[obj_type])
      n_ground *= math.perm(n_obj, self.params[params_type])
    return n_ground

In [94]:
class Action_Schema(nn.Module):
  def __init__(self, name, params):
    super(Action_Schema, self).__init__()
    self.name = name
    # params are dicts {Type: num}
    self.params = params
    self.params_types = sorted(params.keys(), key=lambda x: x.name)
    # predicates that are relevant
    self.predicates = []

  def initialise(self, predicates, device):
    '''
    Input all predicates and generate the model for action schema
    '''
    n_features = 0
    for predicate in predicates:
      # A predicate is relevant to an action schema iff for each of its param type,
      # the number of objects required is leq the number of objects there is
      # for the same type or children type in the action schema
      is_relevant = True
      # Also calculate how many propositions there are when predicate is grounded on "variables"
      # e.g. on X Y; on Y X when X and Y are variables
      n_ground = 1
      for params_type in predicate.params_types:
        n_params = 0
        for model_params_type in self.params:
          if model_params_type.is_child(params_type):
            n_params += self.params[model_params_type]
        if predicate.params[params_type]>n_params:
          is_relevant = False
          break
        else:
          n_ground *= math.perm(n_params, predicate.params[params_type])
      if is_relevant:
        self.predicates.append(predicate)
        n_features += n_ground
    n_features = int(n_features)

    self.randn = torch.randn(n_features, 128, device=device, requires_grad=True)
    self.mlp = nn.Sequential(
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 16),
        nn.ReLU(),
        nn.Linear(16, 4),
        nn.Softmax(dim=1)
    )
    self.mlp.to(device)

  def forward(self):
    return self.mlp(self.randn)

  def ground(self, objects, is_single_action=False):
    if is_single_action:
      propositions = []
      for predicate in self.predicates:
        propositions.extend(predicate.ground(objects))
      return propositions
    else:
      propositions = []
      obj_lists_per_params = {params_type:[] for params_type in self.params_types}
      for params_type in self.params_types:
        for obj_type in objects.keys():
          if obj_type.is_child(params_type):
            obj_lists_per_params[params_type].extend(objects[obj_type])
      for obj_list in itertools.product(*[itertools.permutations(obj_lists_per_params[params_type], self.params[params_type])\
                                          for params_type in self.params_types]):
        objects_per_action = {}
        for i in range(len(self.params_types)):
          objects_per_action[self.params_types[i]] = obj_list[i]
        propositions_per_action = []
        for predicate in self.predicates:
          propositions_per_action.extend(predicate.ground(objects_per_action))
        propositions.append(propositions_per_action)
      return propositions

  def pretty_print(self):
    var = {}
    n = 0
    for param_type in self.params_types:
        var[param_type] = list(string.ascii_lowercase)[
            n : n + self.params[param_type]
        ]
        n += self.params[param_type]
    print(
        f"{self.name}"
        + " "
        + " ".join([k.name + " " + v for k in var.keys() for v in var[k]])
    )
    propositions = [
        p for predicate in self.predicates for p in predicate.ground(var)
    ]
    precon_list = []
    addeff_list = []
    deleff_list = []
    result = torch.argmax(self(), dim=1)
    for i in range(len(propositions)):
        if result[i] == 1:
            addeff_list.append(propositions[i])
        elif result[i] == 2:
            precon_list.append(propositions[i])
        elif result[i] == 3:
            precon_list.append(propositions[i])
            deleff_list.append(propositions[i])
    print(", ".join(precon_list))
    print(", ".join(addeff_list))
    print(", ".join(deleff_list))
    return precon_list, addeff_list, deleff_list, var

In [95]:
class Domain_Model(nn.Module):
  def __init__(self, predicates, action_schemas, device):
    super(Domain_Model, self).__init__()
    self.predicates = predicates
    self.action_schemas = action_schemas
    self.device = device
    for action_schema in action_schemas:
      action_schema.initialise(predicates, self.device)

  def ground(self, objects):
    # Ground predicates to propositions
    # Record in a dictionary with values as indices, for later lookup
    self.propositions = {}
    for predicate in self.predicates:
      for proposition in predicate.ground(objects):
        self.propositions[proposition] = len(self.propositions)

    # For each action schema, ground to actions and then find the indices
    self.indices = []
    # Also need to know which action schema each action is from
    self.action_to_schema = []
    for action_schema in self.action_schemas:
      for propositions in action_schema.ground(objects):
        self.indices.append([self.propositions[p] for p in propositions])
        self.action_to_schema.append(action_schema)
        

  def build(self, actions):
    '''
    actions is a list of numbers
    '''
    precon = torch.zeros((len(actions), len(self.propositions)), device=self.device, requires_grad=False)
    addeff = torch.zeros((len(actions), len(self.propositions)), device=self.device, requires_grad=False)
    deleff = torch.zeros((len(actions), len(self.propositions)), device=self.device, requires_grad=False)
    for i in range(len(actions)):
      y_indices = self.indices[actions[i]]
      schema = self.action_to_schema[actions[i]]
      y_indices_set = set(y_indices)
      
      schema_prams = schema()
      schema_precon = schema_prams @ torch.tensor([0.0, 0.0, 1.0, 1.0], device=self.device)
      schema_addeff = schema_prams @ torch.tensor([0.0, 1.0, 0.0, 0.0], device=self.device)
      schema_deleff = schema_prams @ torch.tensor([0.0, 0.0, 0.0, 1.0], device=self.device)

      if len(y_indices)>len(y_indices_set):
        # There are duplicate indices in y_indices
        # Multiple predicates are grounded to one same proposition
        # We need to combine the contribution from different predicates to one proposition
        applied = set()
        for y_idx in y_indices:
          if y_idx not in applied:
            precon[i, y_idx] += schema_precon[y_idx]
            addeff[i, y_idx] += schema_addeff[y_idx]
            deleff[i, y_idx] += schema_deleff[y_idx]
            applied.add(y_idx)
          else:
            # The multiple effects are combined with "or"
            # p v q = not ((not p)^(not q))
            precon[i, y_idx] = 1 - (1-precon[i, y_idx])*(1-schema_precon[y_idx])
            addeff[i, y_idx] = 1 - (1-addeff[i, y_idx])*(1-schema_addeff[y_idx])
            deleff[i, y_idx] = 1 - (1-deleff[i, y_idx])*(1-schema_deleff[y_idx])
      else:
        x_indices = [i]*len(y_indices)
        precon[x_indices, y_indices] += schema_precon
        addeff[x_indices, y_indices] += schema_addeff
        deleff[x_indices, y_indices] += schema_deleff
    return precon, addeff, deleff

# Blockworld

## Model

In [96]:
obj = Type("object", None)

In [97]:
model = Domain_Model(
    [
        Predicate('arm-empty', {}),
        Predicate('clear', {obj:1}),
        Predicate('on-table', {obj:1}),
        Predicate('holding', {obj:1}),
        Predicate('on', {obj:2}),
    ],
    [
        Action_Schema('pickup', {obj:1}),
        Action_Schema('putdown', {obj:1}),
        Action_Schema('stack', {obj:2}),
        Action_Schema('unstack', {obj:2}),
    ]
, device="cpu")

In [98]:
objects = {obj: ['a', 'b', 'c', 'd', 'e']}

In [99]:
model.ground(objects)

In [100]:
all_grounded_actions = {}
for action_schema in model.action_schemas:
    obj_lists_per_params = {params_type:[] for params_type in action_schema.params_types}
    for params_type in action_schema.params_types:
        for obj_type in objects.keys():
            if obj_type.is_child(params_type):
                obj_lists_per_params[params_type].extend(objects[obj_type])  
    for obj_list in itertools.product(*[itertools.permutations(obj_lists_per_params[params_type], action_schema.params[params_type])\
                                          for params_type in action_schema.params_types]):
        objects_per_action = {}
        constructed = action_schema.name + ' ' + ' '.join([f'{action_schema.params_types[i].name} '
                                                           +f' {action_schema.params_types[i].name} '.join(obj_list[i])
                                                           for i in range(len(obj_list))])
        all_grounded_actions[constructed] = len(all_grounded_actions)

## Data

In [8]:
steps_state1 = []
steps_action = []
steps_state2 = []

traces = generate.pddl.VanillaSampling(dom='/Users/omarwattad/Documents/Action Model - Research/rosame/action_model_inference/rovers/StripsRover.pddl', prob='/Users/omarwattad/Documents/Action Model - Research/rosame/action_model_inference/rovers/pfile98.pddl', plan_len = 10, num_traces = 10).traces
for trace in traces:
    # last step no action
    for t in range(len(trace.steps)-1):
        fluents_in_state1 = {f._serialize()[1:-1] for f in trace.steps[t].state if trace.steps[t].state[f] is True}
        fluents_in_state2 = {f._serialize()[1:-1] for f in trace.steps[t+1].state if trace.steps[t+1].state[f] is True}
        state1 = [1 if p in fluents_in_state1 else 0 for p in model.propositions]
        state2 = [1 if p in fluents_in_state2 else 0 for p in model.propositions]
        
        # action parameter order may not be the same as our model
        action = trace.steps[t].action
        action_obj_params = sorted([o for o in action.obj_params], key=lambda o:o.obj_type)
        
        steps_action.append(all_grounded_actions[f"{action.name} {' '.join([o.details()for o in action_obj_params])}"])
        
        steps_state1.append(state1)
        steps_state2.append(state2)

100%|██████████| 10/10 [00:00<00:00, 28.11it/s]


NameError: name 'model' is not defined

In [90]:
steps_state1 = []
steps_action = []
steps_state2 = []

traces = generate.pddl.VanillaSampling(dom='/Users/omarwattad/Documents/Action Model - Research/rosame/data/pddl/logistics/domain.pddl', prob='/Users/omarwattad/Documents/Action Model - Research/rosame/data/pddl/logistics/prob01.pddl', plan_len=400, num_traces=10).traces
for trace in traces:
    # last step no action
    for t in range(len(trace.steps)-1):
        fluents_in_state1 = set()
        for f in trace.steps[t].state:
            if trace.steps[t].state[f] is True:
                if f.name=='at':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state1.add(f'at location {serialized_list[4]} movable {serialized_list[2]}')
                elif f.name=='in':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state1.add(f'in obj {serialized_list[2]} transport {serialized_list[4]}')
                elif f.name=='in-city':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state1.add(f'in-city city {serialized_list[4]} location {serialized_list[2]}')
        fluents_in_state2 = set()
        for f in trace.steps[t+1].state:
            if trace.steps[t+1].state[f] is True:
                if f.name=='at':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state2.add(f'at location {serialized_list[4]} movable {serialized_list[2]}')
                elif f.name=='in':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state2.add(f'in obj {serialized_list[2]} transport {serialized_list[4]}')
                elif f.name=='in-city':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state2.add(f'in-city city {serialized_list[4]} location {serialized_list[2]}')
        state1 = [1 if p in fluents_in_state1 else 0 for p in model.propositions]
        state2 = [1 if p in fluents_in_state2 else 0 for p in model.propositions]

        # action parameter order may not be the same as our model
        action = trace.steps[t].action
        action_obj_params = []
        for o in action.obj_params:
            if 'airplane' not in action.name and o.obj_type=='airport':
                o.obj_type = 'location'
            action_obj_params.append(o)
        action_obj_params = sorted(action_obj_params, key=lambda o:o.obj_type)

        steps_action.append(all_grounded_actions[f"{action.name} {' '.join([o.details()for o in action_obj_params])}"])

        steps_state1.append(state1)
        steps_state2.append(state2)

100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


In [75]:
def add_noise_to_trajectory(trajectory, epsilon):
    final = []
    np.random.seed(123)
    for traj in trajectory:
      noisy_trajectory = []
      for f in traj:
          if f == 0:
              f_noise =  np.random.uniform(0, epsilon)
          else:
              f_noise = 1 - np.random.uniform(0, epsilon)
          noisy_trajectory.append(f_noise)
      final.append(noisy_trajectory)
    return final

In [62]:
from models.rosame import *

def get_domain_model(domain, device=torch.device("cpu")):
    domain_model = load_model(
        os.path.join(
            os.path.dirname("/Users/omarwattad/Documents/Action Model - Research/rosame"), "/Users/omarwattad/Documents/Action Model - Research/rosame/models/domains", domain, "domain_model.json"
        ),
        device,
    )
    domain_model.ground_from_json(
        os.path.join(
            os.path.dirname("/Users/omarwattad/Documents/Action Model - Research/rosame"), "/Users/omarwattad/Documents/Action Model - Research/rosame/models/domains", domain, "objects.json"
        )
    )
    return domain_model

model = get_domain_model("8-puzzle")

In [64]:
import json
import torch

  # for eps in [0.2,0.4,0.6,0.8,1]:
  #   print(data_count)
with open(f"/Users/omarwattad/Documents/Action Model - Research/rosame/json_files/labels/synth_8-puzzle.json", "r") as f:
    data = json.load(f)
#   if data_count != 1:
#       data = data[:data_count]
#
pre = []
next = []
actions = []
for triplet in data:
    pre.append(triplet[0])
    next.append(triplet[2])
    actions.append(triplet[1])

for data_count in [1,2,3,5,7]:

    steps_action_ = actions[:data_count]
    steps_state1_ = pre[:data_count]
    steps_state2_ = next[:data_count]
    # pre = add_noise_to_trajectory(steps_state1_, eps)
    # next = add_noise_to_trajectory(steps_state2_, eps)
    print(len(steps_action_), len(steps_state1_), len(steps_state2_))
    steps_state1_tensor = torch.tensor(np.array(steps_state1_)).float()
    steps_action_tensor = torch.tensor(np.array(steps_action_))
    steps_state2_tensor = torch.tensor(np.array(steps_state2_)).float()

    batch_sz = 1000
    dataset = TensorDataset(steps_state1_tensor, steps_action_tensor, steps_state2_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_sz, shuffle=False)

    #TRAINING
    parameters = []
    for schema in model.action_schemas:
        parameters.append({'params': schema.parameters(), 'lr': 1e-3})
    optimizer = optim.Adam(parameters)

    for epoch in range(100):
      optimizer.zero_grad()
      loss_final = 0.0
      for i, (state_1, executed_actions, state_2) in enumerate(dataloader):
        precon, addeff, deleff = model.build(executed_actions)
        # The result of applying a in s is (s\Del(a)) U Add(a)
        # We can simplfy this to be:
        # ((p in state 1) ^ (not p in Del(a))) v ((not p in state 1) ^ (p in Add(a)))
        # Note we implicitly apply the constraint that add effects and preconditions
        # cannot intersect and only preconditions can be deleted
        # The "or" can be translated to an addition as the two sides and exclusive
    #     preds = addeff + (1-addeff)*state_1*(1-deleff)
        preds = 1- (1-state_1*(1-deleff)) * (1-(1-state_1)*addeff)

        # Since we view the state_2 as true targets, we can binary cross-entropy loss
        # If state_2 is also predicated, use KL-divergence to ensure two distributions are close?
        loss = F.mse_loss(preds, state_2, reduction='sum')
        # Add in validity constraint
        # Since executed actions are applicable in state_1
        # p in Pre(a) -> p in state_1 for all a in executed_actions
        # not ((p in Pre(a)) ^ (not p in state_1))
        validity_constraint = (1-state_1) * (precon)
        loss += F.mse_loss(validity_constraint, torch.zeros(validity_constraint.shape, dtype=validity_constraint.dtype), reduction='sum')
        loss += 0.2*F.mse_loss(precon, torch.ones(precon.shape, dtype=precon.dtype), reduction='sum')
    #     loss += model.constraint_loss()
        loss.backward()
        optimizer.step()
        loss_final += loss.item() / batch_sz
      if epoch%10 == 0:
        print('Epoch {} RESULTS: Average loss: {:.10f}'.format(epoch, loss_final))

    for action_schema in model.action_schemas:
        action_schema.pretty_print()
        print()

    # TO PDDL



    def to_pddl(write_pddl_to):
        pddl = ""
        pddl = head_pddl("blocksworld")
        for schema in model.action_schemas:
            pre,add,dell,params = schema.pretty_print()
            pddl += action_pddl(schema,pre,add,dell,params)
            print("----------------------------")
        pddl += ")"
        print(pddl)
        # create and write to pddl file
        with open(write_pddl_to,"w") as f:
            f.write(pddl)
        print(f"PDDL content successfully written to {write_pddl_to}")

    def action_pddl(schema,pre,add,delete,params):
        action_str = f"(:action {schema.name}\n"
        parameters = f"  :parameters ("
        param_str = ""
        for type,p in params.items():
            for param in p:
                param_str += f"?{param} - {type.name} "
        parameters += param_str[:-1] + ")\n"

        preconditions = f"  :precondition (and"
        effects = f"  :effect (and"
        for precon in pre:
            cond = precon.split(" ")
            if len(cond) > 1:
                pre_str = f" ({cond[0]}"
                for i in range(1,len(cond)):
                    if i % 2 == 0:
                        pre_str += f' ?{cond[i]}'
                pre_str += ")"
            else:
                pre_str = f" ({cond[0]})"

            preconditions += pre_str
        preconditions += ")\n"

        for addeff in add:
            cond = addeff.split(" ")
            if len(cond) > 1:
                add_str = f" ({cond[0]}"
                for i in range(1,len(cond)):
                    if i % 2 == 0:
                        add_str += f' ?{cond[i]}'
                add_str += ")"
            else:
                add_str = f" ({cond[0]})"
            effects += add_str

        for deleff in delete:
            cond = deleff.split(" ")
            if len(cond) > 1:
                del_str = f" (not ({cond[0]}"
                for i in range(1,len(cond)):
                    if i % 2 == 0:
                        del_str += f' ?{cond[i]}'
                del_str += "))"
            else:
                del_str = f" (not ({cond[0]}))"
            effects += del_str
        effects += "))\n\n"
        return action_str + parameters + preconditions + effects

    def head_pddl(domain_name):
        define = f"(define (domain {domain_name}-4ops)\n" # remove "-4ops"
        req_str = ""
        if domain_name == "blocksworld" or domain_name == 'hanoi':
            req_str = "(:requirements :strips :equality)\n"
        elif domain_name == "gripper-strips":
            req_str = "(:requirements :adl :equality)\n"

        types = f"(:types block - object)"

        predicates = f'(:predicates (arm-empty) (clear ?a - block) (on-table ?a - block) (holding ?a - block) (on ?a - block ?b - block))'
        predicates += "\n\n"
        return define + req_str + types + predicates

    to_pddl(f'/Users/omarwattad/Documents/Action Model - Research/rosame/final_domains/label/rosame/slide/{data_count}_domain.pddl')

1 1 1
Epoch 0 RESULTS: Average loss: 0.0706858826
Epoch 10 RESULTS: Average loss: 0.0699630508
Epoch 20 RESULTS: Average loss: 0.0687334747
Epoch 30 RESULTS: Average loss: 0.0669666977
Epoch 40 RESULTS: Average loss: 0.0661806564
Epoch 50 RESULTS: Average loss: 0.0653700562
Epoch 60 RESULTS: Average loss: 0.0649346619
Epoch 70 RESULTS: Average loss: 0.0648584595
Epoch 80 RESULTS: Average loss: 0.0648332596
Epoch 90 RESULTS: Average loss: 0.0648167419
move-up position a position b position c tile d

at position a position b tile d, at position a position c tile d, at position b position a tile d, at position b position c tile d, at position c position a tile d, at position c position b tile d, blank position a position b, blank position a position c, blank position b position a, blank position b position c, blank position c position a, blank position c position b, inc position a position b, inc position a position c, inc position b position a, inc position b position c, inc position c p

In [12]:
steps_state1_tensor = torch.tensor(np.array(steps_state1)).float()
steps_action_tensor = torch.tensor(np.array(steps_action))
steps_state2_tensor = torch.tensor(np.array(steps_state2)).float()

In [13]:
batch_sz = 1000
dataset = TensorDataset(steps_state1_tensor, steps_action_tensor, steps_state2_tensor)
dataloader = DataLoader(dataset, batch_size=batch_sz, shuffle=False)

## Training

In [14]:
parameters = []
for schema in model.action_schemas:
    parameters.append({'params': schema.parameters(), 'lr': 1e-3})
optimizer = optim.Adam(parameters)

In [15]:
for epoch in range(100):
  optimizer.zero_grad()
  loss_final = 0.0
  for i, (state_1, executed_actions, state_2) in enumerate(dataloader):
    precon, addeff, deleff = model.build(executed_actions)
    # The result of applying a in s is (s\Del(a)) U Add(a)
    # We can simplfy this to be:
    # ((p in state 1) ^ (not p in Del(a))) v ((not p in state 1) ^ (p in Add(a)))
    # Note we implicitly apply the constraint that add effects and preconditions
    # cannot intersect and only preconditions can be deleted
    # The "or" can be translated to an addition as the two sides and exclusive
#     preds = addeff + (1-addeff)*state_1*(1-deleff)
    preds = 1- (1-state_1*(1-deleff)) * (1-(1-state_1)*addeff)
    
    # Since we view the state_2 as true targets, we can binary cross-entropy loss
    # If state_2 is also predicated, use KL-divergence to ensure two distributions are close?
    loss = F.mse_loss(preds, state_2, reduction='sum')
    # Add in validity constraint
    # Since executed actions are applicable in state_1
    # p in Pre(a) -> p in state_1 for all a in executed_actions
    # not ((p in Pre(a)) ^ (not p in state_1))
    validity_constraint = (1-state_1) * (precon)
    loss += F.mse_loss(validity_constraint, torch.zeros(validity_constraint.shape, dtype=validity_constraint.dtype), reduction='sum')
    loss += 0.2*F.mse_loss(precon, torch.ones(precon.shape, dtype=precon.dtype), reduction='sum')
#     loss += model.constraint_loss()
    loss.backward()
    optimizer.step()
    loss_final += loss.item() / batch_sz
  if epoch%10 == 0:
    print('Epoch {} RESULTS: Average loss: {:.10f}'.format(epoch, loss_final))

Epoch 0 RESULTS: Average loss: 0.8820755615
Epoch 10 RESULTS: Average loss: 0.8452726440
Epoch 20 RESULTS: Average loss: 0.7793428955
Epoch 30 RESULTS: Average loss: 0.6977715454
Epoch 40 RESULTS: Average loss: 0.6401904297
Epoch 50 RESULTS: Average loss: 0.6134895630
Epoch 60 RESULTS: Average loss: 0.6045328979
Epoch 70 RESULTS: Average loss: 0.5974949341
Epoch 80 RESULTS: Average loss: 0.5947376709
Epoch 90 RESULTS: Average loss: 0.5938486328


In [16]:
for action_schema in model.action_schemas:
    action_schema.pretty_print()
    print()

pickup object a
arm-empty, clear object a, on-table object a
holding object a
arm-empty, clear object a, on-table object a

putdown object a
holding object a
arm-empty, clear object a, on-table object a
holding object a

stack object a object b
clear object b, holding object a
arm-empty, clear object a, on object a object b
clear object b, holding object a

unstack object a object b
arm-empty, clear object a, on object a object b
clear object b, holding object a
arm-empty, clear object a, on object a object b



# Gripper

## Model (Problem Specific)

In [133]:
base = Type("object", None)
room = Type("room", base)
ball = Type("ball", base)
gripper = Type("gripper", base)

In [134]:
model = Domain_Model([
                        Predicate('at-robby', {room:1}),
                        Predicate('at', {ball:1, room:1}),
                        Predicate('free', {gripper:1}),
                        Predicate('carry', {ball:1, gripper:1}),],
                     [
                        Action_Schema('move', {room:2}),
                        Action_Schema('pick', {ball:1, room:1, gripper:1}),
                        Action_Schema('drop', {ball:1, room:1, gripper:1}),
                     ], device='cpu')

In [135]:
objects = {
    room: ['rooma', 'roomb'],
    ball: ['ball1', 'ball2', 'ball3', 'ball4', 'ball5', 'ball6'],
    gripper: ['left', 'right']
}

In [136]:
model.ground(objects)

In [137]:
all_grounded_actions = {}
for action_schema in model.action_schemas:
    obj_lists_per_params = {params_type:[] for params_type in action_schema.params_types}
    for params_type in action_schema.params_types:
        for obj_type in objects.keys():
            if obj_type.is_child(params_type):
                obj_lists_per_params[params_type].extend(objects[obj_type])  
    for obj_list in itertools.product(*[itertools.permutations(obj_lists_per_params[params_type], action_schema.params[params_type])\
                                          for params_type in action_schema.params_types]):
        objects_per_action = {}
        constructed = action_schema.name + ' ' + ' '.join([f'{action_schema.params_types[i].name} '
                                                           +f' {action_schema.params_types[i].name} '.join(obj_list[i])
                                                           for i in range(len(obj_list))])
        all_grounded_actions[constructed] = len(all_grounded_actions)

In [138]:
all_grounded_actions

{'move room rooma room roomb': 0,
 'move room roomb room rooma': 1,
 'pick ball ball1 gripper left room rooma': 2,
 'pick ball ball1 gripper left room roomb': 3,
 'pick ball ball1 gripper right room rooma': 4,
 'pick ball ball1 gripper right room roomb': 5,
 'pick ball ball2 gripper left room rooma': 6,
 'pick ball ball2 gripper left room roomb': 7,
 'pick ball ball2 gripper right room rooma': 8,
 'pick ball ball2 gripper right room roomb': 9,
 'pick ball ball3 gripper left room rooma': 10,
 'pick ball ball3 gripper left room roomb': 11,
 'pick ball ball3 gripper right room rooma': 12,
 'pick ball ball3 gripper right room roomb': 13,
 'pick ball ball4 gripper left room rooma': 14,
 'pick ball ball4 gripper left room roomb': 15,
 'pick ball ball4 gripper right room rooma': 16,
 'pick ball ball4 gripper right room roomb': 17,
 'pick ball ball5 gripper left room rooma': 18,
 'pick ball ball5 gripper left room roomb': 19,
 'pick ball ball5 gripper right room rooma': 20,
 'pick ball ball5 g

## Data

In [23]:
steps_state1 = []
steps_action = []
steps_state2 = []

traces = generate.pddl.VanillaSampling(dom='./gripper/domain.pddl', prob='./gripper/prob01.pddl', plan_len = 10, num_traces = 10).traces
for trace in traces:
    # last step no action
    for t in range(len(trace.steps)-1):
        fluents_in_state1 = {f._serialize()[1:-1] for f in trace.steps[t].state if trace.steps[t].state[f] is True}
        fluents_in_state2 = {f._serialize()[1:-1] for f in trace.steps[t+1].state if trace.steps[t+1].state[f] is True}
        state1 = [1 if p in fluents_in_state1 else 0 for p in model.propositions]
        state2 = [1 if p in fluents_in_state2 else 0 for p in model.propositions]
        
        # action parameter order may not be the same as our model
        action = trace.steps[t].action
        action_obj_params = sorted([o for o in action.obj_params], key=lambda o:o.obj_type)
        
        steps_action.append(all_grounded_actions[f"{action.name} {' '.join([o.details()for o in action_obj_params])}"])
        
        steps_state1.append(state1)
        steps_state2.append(state2)

100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 157.10it/s]


In [24]:
steps_state1_tensor = torch.tensor(np.array(steps_state1)).float()
steps_action_tensor = torch.tensor(np.array(steps_action))
steps_state2_tensor = torch.tensor(np.array(steps_state2)).float()

In [25]:
batch_sz = 1000
dataset = TensorDataset(steps_state1_tensor, steps_action_tensor, steps_state2_tensor)
dataloader = DataLoader(dataset, batch_size=batch_sz, shuffle=False)

## Training

In [26]:
parameters = []
for schema in model.action_schemas:
    parameters.append({'params': schema.parameters(), 'lr': 1e-3})
optimizer = optim.Adam(parameters)

In [27]:
for epoch in range(100):
  loss_final = 0.0
  for i, (state_1, executed_actions, state_2) in enumerate(dataloader):
    optimizer.zero_grad()
    precon, addeff, deleff = model.build(executed_actions)
    # The result of applying a in s is (s\Del(a)) U Add(a)
    # We can simplfy this to be:
    # ((p in state 1) ^ (not p in Del(a))) v ((not p in state 1) ^ (p in Add(a)))
    # Note we implicitly apply the constraint that add effects and preconditions
    # cannot intersect and only preconditions can be deleted
    # The "or" can be translated to an addition as the two sides and exclusive
#     preds = addeff + (1-addeff)*state_1*(1-deleff)
    preds = state_1*(1-deleff) + (1-state_1)*addeff
    
    # Since we view the state_2 as true targets, we can binary cross-entropy loss
    # If state_2 is also predicated, use KL-divergence to ensure two distributions are close?
    loss = F.mse_loss(preds, state_2, reduction='sum')
    # Add in validity constraint
    # Since executed actions are applicable in state_1
    # p in Pre(a) -> p in state_1 for all a in executed_actions
    # not ((p in Pre(a)) ^ (not p in state_1))
    validity_constraint = (1-state_1) * (precon)
    loss += F.mse_loss(validity_constraint, torch.zeros(validity_constraint.shape, dtype=validity_constraint.dtype), reduction='sum')
#     loss += model.constraint_loss()
    loss += 0.2*F.mse_loss(precon, torch.ones(precon.shape, dtype=precon.dtype), reduction='sum')
    loss.backward()
    optimizer.step()
    loss_final += loss.item() / batch_sz
  if epoch%10 == 0:
    print('Epoch {} RESULTS: Average loss: {:.10f}'.format(epoch, loss_final))

Epoch 0 RESULTS: Average loss: 0.6226490479
Epoch 10 RESULTS: Average loss: 0.5924045410
Epoch 20 RESULTS: Average loss: 0.5525383301
Epoch 30 RESULTS: Average loss: 0.5116485291
Epoch 40 RESULTS: Average loss: 0.4767666321
Epoch 50 RESULTS: Average loss: 0.4681913452
Epoch 60 RESULTS: Average loss: 0.4654275513
Epoch 70 RESULTS: Average loss: 0.4632035828
Epoch 80 RESULTS: Average loss: 0.4623683167
Epoch 90 RESULTS: Average loss: 0.4620728760


In [28]:
for action_schema in model.action_schemas:
    action_schema.pretty_print()
    print()

move room a room b
at-robby room a
at-robby room b
at-robby room a

pick ball a gripper b room c
at-robby room c, at ball a room c, free gripper b
carry ball a gripper b
at ball a room c, free gripper b

drop ball a gripper b room c
at-robby room c, carry ball a gripper b
at ball a room c, free gripper b
carry ball a gripper b



# Logistics

## Model

In [109]:
base = Type("object", None)
movable = Type("movable", base)
location = Type("location", base)
city = Type("city", base)
obj = Type("obj", movable)
transport = Type("transport", movable)
truck = Type("truck", transport)
airplane = Type("airplane", transport)
airport = Type("airport", location)

In [74]:
model = Domain_Model([
                        Predicate('at', {movable:1, location: 1}),
                        Predicate('in', {obj:1, transport:1}),
                        Predicate('in-city', {location:1, city:1}),],
                     [
                        Action_Schema('load-truck', {obj:1, truck:1, location:1}),
                        Action_Schema('load-airplane', {obj:1, airplane:1, airport:1}),
                        Action_Schema('unload-truck', {obj:1, truck:1, location:1}),
                        Action_Schema('unload-airplane', {obj:1, airplane:1, airport:1}),
                        Action_Schema('drive-truck', {truck:1, location:2, city:1}),
                        Action_Schema('fly-airplane', {airplane:1, airport:2})
                     ], device="cpu")

In [75]:
objects = {
    location: [
        "city1-1",
        "city2-1"
    ],
    city: [
        "city1",
        "city2"
    ],
    obj: [
        "package1",
        "package2",
        "package3",
        "package4",
        "package5",
        "package6"
    ],
    truck: [
        "truckred",
        "trucklime"
    ],
    airplane: [
        "planeblue",
        "planeyellow"
    ],
    airport: [
        "city1-2",
        "city2-2"
    ]
}


In [76]:
model.ground(objects)

In [77]:
all_grounded_actions = {}
for action_schema in model.action_schemas:
    obj_lists_per_params = {params_type:[] for params_type in action_schema.params_types}
    for params_type in action_schema.params_types:
        for obj_type in objects.keys():
            if obj_type.is_child(params_type):
                obj_lists_per_params[params_type].extend(objects[obj_type])  
    for obj_list in itertools.product(*[itertools.permutations(obj_lists_per_params[params_type], action_schema.params[params_type])\
                                          for params_type in action_schema.params_types]):
        objects_per_action = {}
        constructed = action_schema.name + ' ' + ' '.join([f'{action_schema.params_types[i].name} '
                                                           +f' {action_schema.params_types[i].name} '.join(obj_list[i])
                                                           for i in range(len(obj_list))])
        all_grounded_actions[constructed] = len(all_grounded_actions)

In [78]:
all_grounded_actions

{'load-truck location city1-1 obj package1 truck truckred': 0,
 'load-truck location city1-1 obj package1 truck trucklime': 1,
 'load-truck location city1-1 obj package2 truck truckred': 2,
 'load-truck location city1-1 obj package2 truck trucklime': 3,
 'load-truck location city1-1 obj package3 truck truckred': 4,
 'load-truck location city1-1 obj package3 truck trucklime': 5,
 'load-truck location city1-1 obj package4 truck truckred': 6,
 'load-truck location city1-1 obj package4 truck trucklime': 7,
 'load-truck location city1-1 obj package5 truck truckred': 8,
 'load-truck location city1-1 obj package5 truck trucklime': 9,
 'load-truck location city1-1 obj package6 truck truckred': 10,
 'load-truck location city1-1 obj package6 truck trucklime': 11,
 'load-truck location city2-1 obj package1 truck truckred': 12,
 'load-truck location city2-1 obj package1 truck trucklime': 13,
 'load-truck location city2-1 obj package2 truck truckred': 14,
 'load-truck location city2-1 obj package2 

## Data

In [34]:
steps_state1 = []
steps_action = []
steps_state2 = []
traces = generate.pddl.VanillaSampling(dom='/Users/omarwattad/Documents/Action Model - Research/rosame/data/pddl/logistics/domain.pddl', prob='/Users/omarwattad/Documents/Action Model - Research/rosame/data/pddl/logistics/prob01.pddl', plan_len=40, num_traces=10).traces

for trace in traces:
    # last step no action
    for t in range(len(trace.steps)-1):
        fluents_in_state1 = set()
        for f in trace.steps[t].state:
            if trace.steps[t].state[f] is True:
                if f.name=='at':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state1.add(f'at location {serialized_list[4]} movable {serialized_list[2]}')
                elif f.name=='in':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state1.add(f'in obj {serialized_list[2]} transport {serialized_list[4]}')
                elif f.name=='in-city':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state1.add(f'in-city city {serialized_list[4]} location {serialized_list[2]}')
        fluents_in_state2 = set()
        for f in trace.steps[t+1].state:
            if trace.steps[t+1].state[f] is True:
                if f.name=='at':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state2.add(f'at location {serialized_list[4]} movable {serialized_list[2]}')
                elif f.name=='in':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state2.add(f'in obj {serialized_list[2]} transport {serialized_list[4]}')
                elif f.name=='in-city':
                    serialized_list = f._serialize()[1:-1].split(' ')
                    fluents_in_state2.add(f'in-city city {serialized_list[4]} location {serialized_list[2]}')
        state1 = [1 if p in fluents_in_state1 else 0 for p in model.propositions]
        state2 = [1 if p in fluents_in_state2 else 0 for p in model.propositions]
        
        # action parameter order may not be the same as our model
        action = trace.steps[t].action
        action_obj_params = []
        for o in action.obj_params:
            if 'airplane' not in action.name and o.obj_type=='airport':
                o.obj_type = 'location'
            action_obj_params.append(o)
        action_obj_params = sorted(action_obj_params, key=lambda o:o.obj_type)
        
        steps_action.append(all_grounded_actions[f"{action.name} {' '.join([o.details()for o in action_obj_params])}"])
        
        steps_state1.append(state1)
        steps_state2.append(state2)

100%|██████████| 10/10 [00:00<00:00, 18.67it/s]


In [35]:
steps_state1_tensor = torch.tensor(np.array(steps_state1)).float()
steps_state1_tensor[:, [64, 66, 69, 71]] = 1.0
steps_action_tensor = torch.tensor(np.array(steps_action))
steps_state2_tensor = torch.tensor(np.array(steps_state2)).float()
steps_state2_tensor[:, [64, 66, 69, 71]] = 1.0

In [36]:
import json
def tojson(steps_state1_tensor,steps_action_tensor,steps_state2_tensor):
    # Assume the tensors are already converted to lists
    triplets = []
    steps_state1 = steps_state1_tensor.tolist()
    steps_state2 = steps_state2_tensor.tolist()
    steps_action = steps_action_tensor.tolist()

    # Create triplets from the data
    for index in range(len(steps_state1)):
        triplet = (steps_state1[index], steps_action[index], steps_state2[index])
        triplets.append(triplet)

    # Write to JSON file
    output_path = "/Users/omarwattad/Documents/Action Model - Research/rosame/json_files/labels/logistics.json"
    with open(output_path, "w") as f:
        json.dump(triplets, f)

    print(f"Data successfully written to {output_path}")

tojson(steps_state1_tensor,steps_action_tensor,steps_state2_tensor)


Data successfully written to /Users/omarwattad/Documents/Action Model - Research/rosame/json_files/labels/logistics.json


In [37]:
batch_sz = 1000
dataset = TensorDataset(steps_state1_tensor, steps_action_tensor, steps_state2_tensor)
dataloader = DataLoader(dataset, batch_size=batch_sz, shuffle=False)

In [139]:
for data_count in []:
    # for eps in [0.2]:
        ## adding noise
        # steps_state1 = add_noise_to_trajectory(steps_state1,eps)
        # steps_state2 = add_noise_to_trajectory(steps_state2,eps)
        with open("/Users/omarwattad/Documents/Action Model - Research/rosame/json_files/labels/grid_gripper.json", 'r') as f:
            data = json.load(f)
        steps_state1_tensor = []
        steps_action_tensor = []
        steps_state2_tensor = []
        for triplet in data:
            steps_state1_tensor.append(triplet[0])
            steps_action_tensor.append(triplet[1])
            steps_state2_tensor.append(triplet[2])
        steps_state1_tensor = steps_state1_tensor[:data_count]
        steps_action_tensor = steps_action_tensor[:data_count]
        steps_state2_tensor = steps_state2_tensor[:data_count]

        steps_action_tensor = torch.tensor(steps_action_tensor)
        steps_state2_tensor = torch.tensor(steps_state2_tensor).float()
        steps_state1_tensor = torch.tensor(steps_state1_tensor).float()
        print(len(steps_action_tensor))
        batch_sz = 1000
        dataset = TensorDataset(steps_state1_tensor, steps_action_tensor, steps_state2_tensor)
        dataloader = DataLoader(dataset, batch_size=batch_sz, shuffle=False)

        #TRAINING
        parameters = []
        for schema in model.action_schemas:
            parameters.append({'params': schema.parameters(), 'lr': 1e-3})
        optimizer = optim.Adam(parameters)

        for epoch in range(100):
          optimizer.zero_grad()
          loss_final = 0.0
          for i, (state_1, executed_actions, state_2) in enumerate(dataloader):
            precon, addeff, deleff = model.build(executed_actions)
            # The result of applying a in s is (s\Del(a)) U Add(a)
            # We can simplfy this to be:
            # ((p in state 1) ^ (not p in Del(a))) v ((not p in state 1) ^ (p in Add(a)))
            # Note we implicitly apply the constraint that add effects and preconditions
            # cannot intersect and only preconditions can be deleted
            # The "or" can be translated to an addition as the two sides and exclusive
        #     preds = addeff + (1-addeff)*state_1*(1-deleff)
            preds = 1- (1-state_1*(1-deleff)) * (1-(1-state_1)*addeff)

            # Since we view the state_2 as true targets, we can binary cross-entropy loss
            # If state_2 is also predicated, use KL-divergence to ensure two distributions are close?
            loss = F.mse_loss(preds, state_2, reduction='sum')
            # Add in validity constraint
            # Since executed actions are applicable in state_1
            # p in Pre(a) -> p in state_1 for all a in executed_actions
            # not ((p in Pre(a)) ^ (not p in state_1))
            validity_constraint = (1-state_1) * (precon)
            loss += F.mse_loss(validity_constraint, torch.zeros(validity_constraint.shape, dtype=validity_constraint.dtype), reduction='sum')
            loss += 0.2*F.mse_loss(precon, torch.ones(precon.shape, dtype=precon.dtype), reduction='sum')
        #     loss += model.constraint_loss()
            loss.backward()
            optimizer.step()
            loss_final += loss.item() / batch_sz
          if epoch%10 == 0:
            print('Epoch {} RESULTS: Average loss: {:.10f}'.format(epoch, loss_final))

        for action_schema in model.action_schemas:
            action_schema.pretty_print()
            print()

        # TO PDDL



        def to_pddl(write_pddl_to):
            pddl = ""
            pddl = head_pddl("blocksworld")
            for schema in model.action_schemas:
                pre,add,dell,params = schema.pretty_print()
                pddl += action_pddl(schema,pre,add,dell,params)
                print("----------------------------")
            pddl += ")"
            print(pddl)
            # create and write to pddl file
            with open(write_pddl_to,"w") as f:
                f.write(pddl)
            print(f"PDDL content successfully written to {write_pddl_to}")

        def action_pddl(schema,pre,add,delete,params):
            action_str = f"(:action {schema.name}\n"
            parameters = f"  :parameters ("
            param_str = ""
            for type,p in params.items():
                for param in p:
                    param_str += f"?{param} - {type.name} "
            parameters += param_str[:-1] + ")\n"

            preconditions = f"  :precondition (and"
            effects = f"  :effect (and"
            for precon in pre:
                cond = precon.split(" ")
                if len(cond) > 1:
                    pre_str = f" ({cond[0]}"
                    for i in range(1,len(cond)):
                        if i % 2 == 0:
                            pre_str += f' ?{cond[i]}'
                    pre_str += ")"
                else:
                    pre_str = f" ({cond[0]})"

                preconditions += pre_str
            preconditions += ")\n"

            for addeff in add:
                cond = addeff.split(" ")
                if len(cond) > 1:
                    add_str = f" ({cond[0]}"
                    for i in range(1,len(cond)):
                        if i % 2 == 0:
                            add_str += f' ?{cond[i]}'
                    add_str += ")"
                else:
                    add_str = f" ({cond[0]})"
                effects += add_str

            for deleff in delete:
                cond = deleff.split(" ")
                if len(cond) > 1:
                    del_str = f" (not ({cond[0]}"
                    for i in range(1,len(cond)):
                        if i % 2 == 0:
                            del_str += f' ?{cond[i]}'
                    del_str += "))"
                else:
                    del_str = f" (not ({cond[0]}))"
                effects += del_str
            effects += "))\n\n"
            return action_str + parameters + preconditions + effects

        def head_pddl(domain_name):
            define = f"(define (domain {domain_name}-4ops)\n" # remove "-4ops"
            req_str = ""
            if domain_name == "blocksworld" or domain_name == 'hanoi':
                req_str = "(:requirements :strips :equality)\n"
            elif domain_name == "gripper-strips":
                req_str = "(:requirements :adl :equality)\n"

            types = f"(:types block - object)"

            predicates = f'(:predicates (arm-empty) (clear ?a - block) (on-table ?a - block) (holding ?a - block) (on ?a - block ?b - block))'
            predicates += "\n\n"
            return define + req_str + types + predicates

        to_pddl(f'/Users/omarwattad/Documents/Action Model - Research/rosame/final_domains/label/rosame/gripper/{data_count}_domain.pddl')

## Training

In [107]:
parameters = []
for schema in model.action_schemas:
    parameters.append({'params': schema.parameters(), 'lr': 1e-3})
optimizer = optim.Adam(parameters)

In [108]:
for epoch in range(100):
  optimizer.zero_grad()
  loss_final = 0.0
  for i, (state_1, executed_actions, state_2) in enumerate(dataloader):
    precon, addeff, deleff = model.build(executed_actions)
    # The result of applying a in s is (s\Del(a)) U Add(a)
    # We can simplfy this to be:
    # ((p in state 1) ^ (not p in Del(a))) v ((not p in state 1) ^ (p in Add(a)))
    # Note we implicitly apply the constraint that add effects and preconditions
    # cannot intersect and only preconditions can be deleted
    # The "or" can be translated to an addition as the two sides and exclusive
#     preds = addeff + (1-addeff)*state_1*(1-deleff)
    preds = 1- (1-state_1*(1-deleff)) * (1-(1-state_1)*addeff)
    
    # Since we view the state_2 as true targets, we can binary cross-entropy loss
    # If state_2 is also predicated, use KL-divergence to ensure two distributions are close?
    loss = F.mse_loss(preds, state_2, reduction='sum')
    # Add in validity constraint
    # Since executed actions are applicable in state_1
    # p in Pre(a) -> p in state_1 for all a in executed_actions
    # not ((p in Pre(a)) ^ (not p in state_1))
    validity_constraint = (1-state_1) * (precon)
    loss += F.mse_loss(validity_constraint, torch.zeros(validity_constraint.shape, dtype=validity_constraint.dtype), reduction='sum')
#     loss += model.constraint_loss()
    loss += 0.2*F.mse_loss(precon, torch.ones(precon.shape, dtype=precon.dtype), reduction='sum')
    loss.backward()
    optimizer.step()
    loss_final += loss.item() / batch_sz
  if epoch%10 == 0:
    print('Epoch {} RESULTS: Average loss: {:.10f}'.format(epoch, loss_final))

Epoch 0 RESULTS: Average loss: 1.3818857422
Epoch 10 RESULTS: Average loss: 1.3575368652
Epoch 20 RESULTS: Average loss: 1.3199255371
Epoch 30 RESULTS: Average loss: 1.2863734131
Epoch 40 RESULTS: Average loss: 1.2698570557
Epoch 50 RESULTS: Average loss: 1.2647332764
Epoch 60 RESULTS: Average loss: 1.2626745605
Epoch 70 RESULTS: Average loss: 1.2613405762
Epoch 80 RESULTS: Average loss: 1.2604610596
Epoch 90 RESULTS: Average loss: 1.2601466064


In [140]:
for action_schema in model.action_schemas:
    action_schema.pretty_print()
    print()

move room a room b
at-robby room a, at-robby room b



pick ball a gripper b room c

at-robby room c, at ball a room c, free gripper b, carry ball a gripper b


drop ball a gripper b room c
at-robby room c, at ball a room c, free gripper b, carry ball a gripper b

at-robby room c, at ball a room c, free gripper b, carry ball a gripper b

