In [4]:
"""Finetune the sentence transformer model on a custom dataset."""
from sentence_transformers import InputExample
from sentence_transformers import SentenceTransformer
from torch.utils.data import DataLoader
from sentence_transformers import losses
from datasets import load_dataset
import torch
import sys
import json, os
from pathlib import Path
import glob
import pandas as pd
    
# model = SentenceTransformer("embedding-data/distilroberta-base-sentence-transformer")
model = SentenceTransformer("./finetuned_model")

# to merge all the mini-datasets we created
csv_file_path = "combined_data.csv"
# csv_file_path = "data.csv"
dataset = load_dataset("csv", data_files=csv_file_path)

train_examples = []
train_data = dataset["train"]
n_examples = dataset["train"].num_rows

for i in range(n_examples): # can change number of examples for faster training
    task = train_data[i]["Task"]
    function = train_data[i]["Function"]
    train_examples.append(InputExample(texts=[task, function]))

train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=64)
train_loss = losses.MultipleNegativesRankingLoss(model=model)
num_epochs = 10
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data


model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=num_epochs,
    warmup_steps=warmup_steps,
    output_path='finetuned_model'
)


Generating train split: 2850 examples [00:00, 237852.76 examples/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 54.34it/s]
Iteration: 100%|██████████| 45/45 [00:01<00:00, 39.88it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 56.39it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 53.72it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 55.67it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 59.75it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 45.85it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 47.58it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 59.05it/s]
Iteration: 100%|██████████| 45/45 [00:00<00:00, 54.54it/s]
Epoch: 100%|██████████| 10/10 [00:08<00:00,  1.15it/s]


In [5]:
df = pd.read_csv("../objects_actions.tsv", sep="\t")


def get_openable_objects():
    return df[df["Openable"] == "yes"]["Object Type"].tolist()


def get_pickable_objects():
    return df[df["Pickupable"] == "yes"]["Object Type"].tolist()


def get_toggleable_objects():
    return df[df["On/Off"] == "yes"]["Object Type"].tolist()


def get_receptacle_objects():
    return df[df["Receptacle"] == "yes"]["Object Type"].tolist()


def get_fillable_objects():
    return df[df["Fillable"] == "yes"]["Object Type"].tolist()


def get_sliceable_objects():
    return df[df["Sliceable"] == "yes"]["Object Type"].tolist()


def get_cookable_objects():
    return df[df["Cookable"] == "yes"]["Object Type"].tolist()


def get_breakable_objects():
    return df[df["Breakable"] == "yes"]["Object Type"].tolist()


def get_dirty_objects():
    return df[df["Dirty"] == "yes"]["Object Type"].tolist()


def get_usable_objects():
    return df[df["UsedUp"] == "yes"]["Object Type"].tolist()


all_actions = []
open = [f"OpenObject({obj})" for obj in get_openable_objects()]
close = [f"CloseObject({obj})" for obj in get_openable_objects()]
pick = [f"PickupObject({obj})" for obj in get_pickable_objects()]
put = [f"PutObject({obj})" for obj in get_receptacle_objects()]
toggle_on = [f"ToggleObjectOn({obj})" for obj in get_toggleable_objects()]
toggle_off = [f"ToggleObjectOff({obj})" for obj in get_toggleable_objects()]
# fill = [f"FillObject({obj})" for obj in get_fillable_objects()]
slice = [f"SliceObject({obj})" for obj in get_sliceable_objects()]
clean = [f"CleanObject({obj})" for obj in get_dirty_objects()]
# cook = [f"CookObject({obj})" for obj in get_cookable_objects()]
navigate = [f"NavigateTo({obj})" for obj in df["Object Type"].tolist()]
rotate = [f"Rotate({obj})" for obj in ["Left", "Right"]]
lookup = [f"LookUp({obj})" for obj in [30, 60, 90, 120, 150, 180]]
lookdown = [f"LookDown({obj})" for obj in [30, 60, 90, 120, 150, 180]]
move = [f"Move({obj})" for obj in ["Ahead", "Back", "Left", "Right"]]
done = ["Done"]
idle = ["Idle"]

all_actions.extend(pick)
all_actions.extend(put)
all_actions.extend(open)
all_actions.extend(close)
all_actions.extend(toggle_on)
all_actions.extend(toggle_off)
all_actions.extend(slice)
all_actions.extend(clean)
all_actions.extend(navigate)
all_actions.extend(rotate)
all_actions.extend(lookup)
all_actions.extend(lookdown)
all_actions.extend(move)
all_actions.extend(done)
all_actions.extend(idle)

embeddings = torch.FloatTensor(model.encode(all_actions))


In [6]:
def get_closest_feasible_action(action: str):
    """To convert actions like RotateLeft to Rotate(Left)"""
    action_embedding = torch.FloatTensor(model.encode([action]))
    scores = torch.cosine_similarity(embeddings, action_embedding)
    max_score, max_idx = torch.max(scores, 0)
    return all_actions[max_idx]

In [9]:
action = ["look up by angle 30", "switch object off faucet"]
# print(get_closest_feasible_action("look up by angle 90"))
print(get_closest_feasible_action(action[1]))

ToggleObjectOff(Faucet)
