#TODO VH setup

# Setup

In [1]:
!git clone https://github.com/szy4806384/lm_on_decison_making
%cd ./lm_on_decison_making 
!pip install transformers

Cloning into 'lm_on_decison_making'...
remote: Enumerating objects: 200, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 200 (delta 28), reused 31 (delta 12), pack-reused 135[K
Receiving objects: 100% (200/200), 30.89 MiB | 11.77 MiB/s, done.
Resolving deltas: 100% (93/93), done.
/content/lm_on_decison_making
Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 12.2 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 41.5 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 45.9 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2

In [94]:
from google.colab import drive
import os
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader, IterableDataset
from transformers import GPT2Tokenizer, GPT2Model
from train_utils_mlp import *
import collections
import warnings

warnings.filterwarnings('ignore')

drive.mount('/content/drive')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [95]:
#%matplotlib notebook
#%cd demo
import IPython.display
import glob
from virtualhome.demo.utils_demo import *
from sys import platform
import sys
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
# sys.path.append('../simulation')
from virtualhome.simulation.unity_simulator.comm_unity import UnityCommunication
from virtualhome.simulation.unity_simulator import utils_viz

In [96]:
def create_character(character, room_name):
    comm.add_character(character, initial_room=room_name)
    _, env_graph = comm.environment_graph()
    character_index = [node['id'] for node in env_graph['nodes'] if node['class_name'] == 'character'][-1]
    return character_index

def get_rooms_id(env_graph):
    rooms_id_list = [node['id'] for node in env_graph['nodes'] if node['category'] == 'Rooms']
    return set(rooms_id_list)

def get_visible_objects_id(character_index , env_graph):
    room_id = [edge['to_id'] for edge in env_graph['edges'] if edge['from_id'] == character_index and edge['relation_type'] == 'INSIDE'][-1]
    visible_objects_id_set = [edge['from_id'] for edge in env_graph['edges'] if edge['to_id'] == room_id and edge['relation_type'] == 'INSIDE' and edge['from_id'] != character_index]
    visible_objects_id_set = set(visible_objects_id_set)
    return visible_objects_id_set

def get_observations(character_index, visible_objects_id_set, env_graph):
    character_position = [node for node in env_graph['nodes'] if node['id'] == character_index][0]['obj_transform']['position']
    ids, names, states, positions = [], [], [], []
    for node in env_graph['nodes']:
        if node['id'] in visible_objects_id_set:
            states_vector = [0, 0, 0, 0, 0, 0]
            if len(node['states']) == 0:
                states_vector[-1] = 1
            for state in node['states']:
                states_vector[state_index[state]] += 1
            ids += [node['id']]
            names += [node['class_name']]
            states += [states_vector]
            positions += [node['obj_transform']['position'] + list(np.array(node['obj_transform']['position']) - np.array(character_position))]

    #append to 256 (at most 255 nodes at an observation, with index 0 in training time being null object)
    while len(ids) < 256:
        ids.append(-1)
        names.append("None")
        states.append([0,0,0,0,0,0])
        positions.append([0,0,0,0,0,0])

    return {'ids': ids[:256], 'names': names[:256], 'states':states[:256], 'positions':positions[:256]}


def get_relationships(character_index, observations, env_graph):
    room_id = [edge['to_id'] for edge in env_graph['edges'] if edge['from_id'] == character_index and edge['relation_type'] == 'INSIDE'][-1]
    sitting_edges = set()
    close_edges = set()
    inside_edges = set()
    hold_edges = set()
    for edge in env_graph['edges']:
        if edge['to_id'] in observations['ids'] and edge['from_id'] == character_index:
            if edge['relation_type'] == 'SITTING':
                sitting_edges.add(edge['to_id'])
            elif edge['relation_type'] == 'CLOSE':
                close_edges.add(edge['to_id'])
            elif edge['relation_type'] == 'HOLDS_RH' or edge['relation_type'] == 'HOLDS_LH':
                hold_edges.add(edge['to_id'])
        if edge['from_id'] in observations:
            to_id_ind = np.where(observations['ids']==edge['to_id'])
            if edge['relation_type'] == 'INSIDE' and \
              edge['to_id'] != room_id and \
              'CONTAINERS' in properties[observations['names'][to_id_ind]] and\
              observations['states'][to_id_ind][1] == 1:
                inside_edges.add(edge['from_id'])
    env_relationships ={'sitting': list(sitting_edges),
                    'close': list(close_edges),
                    'inside': list(inside_edges),
                    'hold': list(hold_edges)}
    return env_relationships

In [97]:
mode = 'manual'
if mode == 'auto':
    if platform == 'darwin':
        exec_file = '../simulation/macos_exec'
    else:
        exec_file = '../simulation/exec_linux.x86_64'
    comm = UnityCommunication(file_name=exec_file)
else:
    comm = UnityCommunication()

In [98]:
import json

state_index = {'OPEN': 0, 'CLOSED': 1, 'ON': 2, 'OFF': 3, 'CLEAN':4}
room = {'bathroom', 'kitchen', 'livingroom', 'bedroom'}

properties = open('D:/code/virtualhome/resources/properties_data_all.json')
properties = json.load(properties)
properties['bathroom'] = []
properties['kitchen'] = []
properties['livingroom'] = []
properties['bedroom'] = []

In [99]:
#define tokenizer and embedders
pretrained_lm = 'gpt2'
tokenizer = get_tokenizer(pretrained_lm)
tokenizer.pad_token = tokenizer.eos_token
word_embeddings = get_word_embeddings(pretrained_lm).to(device)

#Test

In [102]:
model = LMDecisionMaker()

In [103]:
model_path = 'D:/code/github_repo/lm_on_decison_making/model_4_mlp.p'
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.eval()

LMDecisionMaker(
  (state_fc): Linear(in_features=6, out_features=32, bias=True)
  (position_fc1): Linear(in_features=6, out_features=16, bias=True)
  (relu): ReLU(inplace=True)
  (position_fc2): Linear(in_features=16, out_features=32, bias=True)
  (name_fc): Linear(in_features=768, out_features=32, bias=True)
  (observation_fc): Linear(in_features=96, out_features=768, bias=True)
  (action_fc): Linear(in_features=128, out_features=8, bias=True)
  (char_fc): Linear(in_features=128, out_features=2, bias=True)
  (object1_fc): Linear(in_features=640, out_features=512, bias=True)
  (object2_fc): Linear(in_features=640, out_features=512, bias=True)
  (lm): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_drop

In [104]:
data_path = 'D:/code/github_repo/lm_on_decison_making/train_data'
data_loader = VHDataloader(data_path, batch_size = 1)
for index, data in zip(np.arange(3), data_loader):
    print(data['relationships'])

[[{'sitting': [], 'close': [256, 257, 262, 263, 264, 266, 269, 271, 272, 273, 274, 277, 278, 279, 280, 281, 282, 283, 284, 286, 319, 320, 231, 232, 233, 253, 254, 255], 'inside': [], 'hold': []}, {'sitting': [], 'close': [449, 453, 391, 392, 393, 427, 397, 398, 399, 400, 370, 373, 409, 410], 'inside': [], 'hold': []}]]
[[{'sitting': [], 'close': [199, 200, 201, 204, 113, 179, 183, 184, 190], 'inside': [], 'hold': []}, {'sitting': [], 'close': [449, 453, 391, 392, 393, 427, 397, 398, 399, 400, 370, 373, 409, 410], 'inside': [], 'hold': []}]]
[[{'sitting': [], 'close': [128, 132, 133, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 149, 151, 156, 158, 159, 160, 161, 162, 166, 168, 169, 170, 179, 202, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127], 'inside': [], 'hold': []}, {'sitting': [], 'close': [449, 453, 391, 392, 393, 427, 397, 398, 399, 400, 370, 373, 409, 410], 'inside': [], 'hold': []}]]


In [105]:
verb_trans = {'walk': 'walked to',
              'grab': 'grabbed',
              'open': 'opened',
              'close': 'closed',
              'switchon': 'switched on',
              'switchoff': 'switched off',
              'put': ['put', 'on'],
              'putin': ['put', 'inside']}

def history_translate(raw_history):
    if not raw_history: return ''
    translated_history = ''
    for sub_raw_history in raw_history:
        sub_raw_history = sub_raw_history.split(' ')
        if len(sub_raw_history) == 2:
            translated_history +=  'Agent{} {}. '.format(sub_raw_history[0][-2], verb_trans[sub_raw_history[1][1: -1]])
        elif len(sub_raw_history) == 4:
            translated_history += 'Agent{} {} the {}. '.format(sub_raw_history[0][-2], verb_trans[sub_raw_history[1][1: -1]], sub_raw_history[2][1: -1])
        elif len(sub_raw_history) == 6:
            translated_history += 'Agent{} {} the {} {} the {}. '.format(sub_raw_history[0][-2], verb_trans[sub_raw_history[1][1: -1]][0], sub_raw_history[2][1: -1], verb_trans[sub_raw_history[1][1: -1]][1], sub_raw_history[4][1: -1])
    translated_history = translated_history[: -1]
    return translated_history

history = ['<char0> [walk] <coffeepot> (161)', '<char0> [grab] <coffeepot> (161)']
history_translate(history)

'Agent0 walked to the coffeepot. Agent0 grabbed the coffeepot.'

In [106]:
# define the functions to add constraints to the output
action_space = {0: 'walk', 1:'grab', 2:'open', 3:'close', 4:'switchon', 5:'switchoff', 6:'put', 7: 'putin'}

def judge_walk(sitting_edges, inside_edges, hold_edges, obj, index):
    cond1 = len(sitting_edges) == 0
    cond2 = index not in inside_edges
    cond3 = obj in room
    cond4 = index not in hold_edges
    cond5 = obj != 'character' and obj != 'None'
    return True if cond1 and (cond2 or cond3) and cond4 and cond5 else False

def judge_grab(close_edges, inside_edges, hold_edges, obj, index):
    cond1 = 'GRABBABLE' in properties[obj] if obj in properties else False
    cond2 = obj != 'water'
    cond3 = index in close_edges
    cond4 = index not in inside_edges
    cond5 = index not in hold_edges
    cond6 = len(hold_edges) < 4
    return True if cond1 and cond2 and cond3 and cond4 and cond5 and cond6 else False

def judge_open(obs_states, close_edges, inside_edges, hold_edges, obj, index, loc):
    cond1 = 'CAN_OPEN' in properties[obj] if obj in properties else False
    cond2 = obs_states[loc][1] = 1
    cond3 = index in close_edges
    cond4 = index not in inside_edges
    cond5 = index not in hold_edges
    cond6 = len(hold_edges) < 4
    return True if cond1 and cond2 and cond3 and cond4 and cond5 and cond6 else False

def judge_close(obs_states, close_edges, inside_edges, hold_edges, obj, index, loc):
    cond1 = 'CAN_OPEN' in properties[obj] if obj in properties else False
    cond2 = obs_states[loc][0] = 1
    cond3 = index in close_edges
    cond4 = index not in inside_edges
    cond5 = index not in hold_edges
    cond6 = len(hold_edges) < 4
    return True if cond1 and cond2 and cond3 and cond4 and cond5 and cond6 else False

def judge_switchon(obs_states, close_edges, obj, index, loc):
    cond1 = 'HAS_SWITCH' in properties[obj] if obj in properties else False
    cond2 = obs_states[loc][3] = 1
    cond3 = index in close_edges
    return True if cond1 and cond2 and cond3 else False

def judge_switchoff(obs_states, close_edges, obj, index, loc):
    cond1 = 'HAS_SWITCH' in properties[obj] if obj in properties else False
    cond2 = obs_states[loc][2] = 1
    cond3 = index in close_edges
    return True if cond1 and cond2 and cond3 else False

def judge_put(hold_edges, close_edges, index1, index2):
    cond1 = index1 in hold_edges
    cond2 = index2 in close_edges
    return True if cond1 and cond2 else False

def judge_putin(obs_states, hold_edges, close_edges, index1, index2, loc):
    cond1 = index1 in hold_edges
    cond2 = index2 in close_edges
    cond3 = obs_states[loc][1] != 1
    return True if cond1 and cond2 and cond3 else False

def judge_1(action, obj, index, relationships, obs_states, loc):
    sitting_edges = relationships['sitting']
    close_edges = relationships['close']
    inside_edges = relationships['inside']
    hold_edges = relationships['hold']
    if action == 'walk':
        return judge_walk(sitting_edges, inside_edges, hold_edges, obj, index)
    elif action == 'grab':
        return judge_grab(close_edges, inside_edges, hold_edges, obj, index)
    elif action == 'open':
        return judge_open(obs_states, close_edges, inside_edges, hold_edges, obj, index, loc)
    elif action == 'close':
        judge_close(obs_states, close_edges, inside_edges, hold_edges, obj, index, loc)
    elif action == 'switchon':
        return judge_switchon(obs_states, close_edges, obj, index, loc)
    elif action == 'switchoff':
        return judge_switchoff(obs_states, close_edges, obj, index, loc)

def judge_2(action, index1, index2, relationships, obs_states, loc):
    close_edges = relationships['close']
    hold_edges = relationships['hold']
    if action == 'put':
        return judge_put(hold_edges, close_edges, index1, index2)
    elif action == 'putin':
        judge_putin(obs_states, hold_edges, close_edges, index1, index2, loc)

In [107]:
# define the function to give the predicate that is valid with constraints
action_index = {'walk': 0, 'grab': 1, 'open': 2, 'close': 3, 'switchon': 4, 'switchoff': 5, 'put': 6, 'putin': 7}
def parse_prediction(data, prediction, last_predicate):
    char_prediction = int(prediction['char'][0][0].cpu().numpy())
    action_prediction = prediction['action'][0]
    object_prediction1 = prediction['object1'][0]
    object_prediction2 = prediction['object2'][0]
    obs_names = data['obs_names'][0]
    obs_ids = data['obs_ids'].cpu().numpy()[0]
    obs_states = data['obs_states'].cpu().numpy()[0]
    relationships1 = data['relationships'][0][0]
    relationships2 = data['relationships'][0][1]
    relationships = {}
    for key in relationships1.keys():
        relationships[key] = set(relationships1[key] + relationships2[key])

    for action_id in action_prediction:
        action = action_space[int(action_id.cpu().numpy())]
        if 0 <= int(action_id.cpu().numpy()) <= 5:
            for object_id in object_prediction1:
                obj = obs_names.split(' ')[int(object_id.cpu().numpy())]
                index = obs_ids[int(object_id.cpu().numpy())]
                if judge_1(action, obj, index, relationships, obs_states, int(object_id.cpu().numpy())):
                    if '<char{}> [{}] <{}> ({})'.format(char_prediction, action, obj, index) != last_predicate:
                        return '<char{}> [{}] <{}> ({})'.format(char_prediction, action, obj, index)

        elif int(action_id.cpu().numpy()) >= 6:
            for object_id1 in object_prediction1:
                obj1 = obs_names.split(' ')[int(object_id1.cpu().numpy())]
                index1 = obs_ids[int(object_id1.cpu().numpy())]
                for object_id2 in object_prediction2:
                    obj2 = obs_names.split(' ')[int(object_id2.cpu().numpy())]
                    index2 = obs_ids[int(object_id2.cpu().numpy())]
                    if judge_2(action, index1, index2, relationships, obs_states, int(object_id2.cpu().numpy())):
                        if '<char{}> [{}] <{}> ({}) <{}> ({})'.format(char_prediction, action, obj1, index1, obj2, index2) != last_predicate:
                            return '<char{}> [{}] <{}> ({}) <{}> ({})'.format(char_prediction, action, obj1, index1, obj2, index2)

In [108]:
# get the key action used to judge if the task is completed
def get_key_action(raw_goals):
    key_actions = set()
    split_goals = raw_goals.split(', ')
    key_action = ''
    for sub_goal in split_goals:
        split_sub_goal = sub_goal.split(' ')
        if split_sub_goal[0] == 'put' and split_sub_goal[2] == 'inside':
            key_action = '[putin] <{}> <{}>'.format(split_sub_goal[1], split_sub_goal[4])
        elif split_sub_goal[0] == 'put' and split_sub_goal[2] == 'on':
            key_action = '[put] <{}> <{}>'.format(split_sub_goal[1], split_sub_goal[4])
        elif split_sub_goal[0] == 'grab':
            key_action = '[grab] <{}>'.format(split_sub_goal[2])
        key_actions.add(key_action)

    return key_actions

goals = 'put clothespants on the towelrack, put clothesshirt inside the clothespile, grab the hanger, grab the plate'
get_key_action(goals)

{'[grab] <hanger>',
 '[grab] <plate>',
 '[put] <clothespants> <towelrack>',
 '[putin] <clothesshirt> <clothespile>'}

In [None]:
epoch = 10000
success_rate = 0
trajectory_loss = 0
maximum_steps = 20

last_goal = ''
with torch.no_grad():
    case = 1
    for ind, data in zip(np.arange(epoch), data_loader):
        history = []
        comm.reset(1)
        character_ids = [create_character('chars/Female2', 'kitchen'), create_character('chars/Male2', 'livingroom')]

        num = 1

        goal = data['goal'][0]
        if goal == last_goal:
            continue
        print(goal)
        last_goal = goal
        key_actions = get_key_action(goal)

        last_predicate = ''
        while num <= maximum_steps:
            _, graph = comm.environment_graph()
            rooms_id = get_rooms_id(graph)

            total_partial_observations = collections.defaultdict(list)
            total_object_relationships = []
            for i in range(len(character_ids)): #iterate 2 chracters
                visible_objects_id = get_visible_objects_id(character_ids[i], graph).union(rooms_id)
                partial_observation = get_observations(character_ids[i], visible_objects_id, graph)
                for k in partial_observation:
                    total_partial_observations[k] += partial_observation[k]
                total_object_relationships += [get_relationships(character_ids[i], partial_observation, graph)]

            #to numpy
            for k in total_partial_observations:
                if k == 'names': continue
                total_partial_observations[k] = np.array(total_partial_observations[k])

            char_label = np.array([-1])
            action_label = np.array(([-1]))
            object_label = np.array([-1, -1])

            data['history'] = [history_translate(history)]
            for key in total_partial_observations:
                if key == 'names': continue
                data['obs_{}'.format(key)] = torch.tensor(np.expand_dims(total_partial_observations[key], 0))
            data['obs_names'] = [" ".join(list(total_partial_observations['names']))]
            data['relationships'] = [total_object_relationships]
            data['char_label'] = torch.tensor(np.expand_dims(char_label, 0))
            data['action_label'] = torch.tensor(np.expand_dims(action_label, 0))
            data['object_label'] = torch.tensor(np.expand_dims(object_label, 0))

            temp_goal = data['goal'].copy()
            temp_history = data['history'].copy()
            temp_obs_names = data['obs_names'].copy()

            sample = preprocess(data, tokenizer, word_embeddings)
            prediction = model(**sample, mode='test')

            data['goal'] = temp_goal
            data['history'] = temp_history
            data['obs_names'] = temp_obs_names
            next_predicate = parse_prediction(data, prediction, last_predicate)
            print(next_predicate)
            last_predicate = next_predicate

            split_next_predicate = next_predicate.split(' ')
            if len(key_actions) > 0:
                if len(split_next_predicate) == 4:
                    action = '{} {}'.format(split_next_predicate[1], split_next_predicate[2])
                else:
                    action = '{} {} {}'.format(split_next_predicate[1], split_next_predicate[2], split_next_predicate[4])
                if action in key_actions:
                    key_actions.remove(action)
            else:
                break

            history.append(next_predicate)
            comm.render_script([next_predicate], recording=False, skip_animation=True)

            num += 1

        if num <= maximum_steps:
            print('{} case succeeds'.format(case))
            success_rate += 1
            trajectory_loss += (1 - 15 / num)
        else:
            print('{} case fails'.format(case))
            success_rate += 0
            trajectory_loss += 1

        case += 1
    print('test case number: {}, success rate is {}, trajectory loss is {}'.format(epoch, success_rate / case, trajectory_loss / case))