In [123]:
import ast
import json
import numpy as np
from pprint import pprint
from typing import Union

from task_decomposition.utils.plotting import visualize_trajectory_decompositions
from task_decomposition.paths import DATA_GT_TXT_PATH, GPT_OUTPUT_PATH

from transformers import BertTokenizer, BertModel
from scipy.spatial.distance import cosine

# Initialize BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

ROUND_DIGITS = 4

START_IDX = 0
END_IDX = 1
SUBTASK_NAME_IDX = 2

DEBUG = True

In [69]:
def extract_subtask_from_groundtruth_file(filepath: str) -> list:
    '''
    This function extracts the subtask from the groundtruth file
    The groundtruth file is a txt file with the following format:
    step	subtask	stage
    0	Align manipulator height with Door	0
    1	Align manipulator height with Door	0
    2	Align manipulator height with Door	0
    3	Align manipulator height with Door	0
    4	Align manipulator height with Door	0
    5	Align manipulator height with Door	0
    6	Align manipulator height with Door	0
    7	Get closer to Door	1
    8	Get closer to Door	1
    9	Get closer to Door	1
    10	Get closer to Door	1
    11	Get closer to Door	1
    12	Get closer to Door	1
    13	Get closer to Door	1
    14	Get closer to Door	1
    15	Get closer to Door	1
    16	Get closer to Door	1
    17	Get closer to Door	1
    18	Get closer to Door	1
    ...

    The function returns a list of subtasks that specifies the start and end step of each subtask
    [(<start_step>, <end_step>, <subtask_name>), ...]
    '''
    _step_idx = 0
    _subtask_idx = 1
    subtask_decomposition = []
    with open(filepath, 'r') as f:
        # read and remove the header
        lines = f.readlines()
        lines = lines[1:] if lines[0].startswith('step') else lines

        # set initial values
        current_subtask = lines[0].split('\t')[_subtask_idx]
        start_step = int(lines[0].split('\t')[_step_idx])

        for idx, line in enumerate(lines):
            subtask = line.split('\t')[_subtask_idx]
            if subtask != current_subtask:
                # log values
                end_step = int(line.split('\t')[_step_idx])-1
                subtask_decomposition.append((start_step, end_step, current_subtask))

                # reset values
                start_step = int(line.split('\t')[_step_idx])
                current_subtask = subtask
            elif idx == len(lines) - 1:
                end_step = int(line.split('\t')[_step_idx])
                subtask_decomposition.append((start_step, end_step, current_subtask))

            # is there a corner case here where we are missing the last element should there be a change?

    return subtask_decomposition

def extract_subtask_from_gpt_output_file(filepath: str) -> list:
    '''
    This function extracts the subtask from the gpt output file.
    The gpt output file is a json, with the field "response" containing the output of the gpt model.
    '''
    # read the json file and load as a dictionary
    with open(filepath, 'r') as f:
        data = json.load(f)

    response = data["response"]
    start = response.find('subtask_decomposition = [') + len('subtask_decomposition = [')
    end = response.find(']', start)
    list_str = response[start:end]

    # Converting string representation of list to actual Python list
    subtask_decomposition = ast.literal_eval('[' + list_str + ']')
    return subtask_decomposition


In [125]:
def intersection(subtask_A: tuple, subtask_B: tuple) -> bool:
    '''
    This function checks if two subtasks intersect.
    '''
    a1, a2 = subtask_A[START_IDX], subtask_A[END_IDX]
    b1, b2 = subtask_B[START_IDX], subtask_B[END_IDX]

    return a1 <= b2 and b1 <= a2

def get_IOU(subtask_A: tuple, subtask_B: tuple) -> float:
    a1, a2 = subtask_A[START_IDX], subtask_A[END_IDX]
    b1, b2 = subtask_B[START_IDX], subtask_B[END_IDX]

    # Calculate the intersection
    intersection = max(0, min(a2, b2) - max(a1, b1))

    # Calculate the union
    union = max(a2, b2) - min(a1, b1)

    # Avoid division by zero
    if union == 0:
        return 0

    # Calculate the IoU
    iou = intersection / union

    return round(iou, ROUND_DIGITS)

def bert_encode(text):
    """
    Encode the text using BERT
    """
    inputs = tokenizer(text, return_tensors='pt')
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1)[0].detach().numpy()

def compare_description_similarity(A: str, B:str) -> float:
    """
    Compare the similarity between two descriptions using BERT embeddings
    """
    embedding1 = bert_encode(A)
    embedding2 = bert_encode(B)
    similarity = 1 - cosine(embedding1, embedding2)
    return round(similarity, ROUND_DIGITS)

# def find_best_matches(subtask: tuple, subtask_decomp_B: list) -> Union[list, list]:
    """
    Find the best matches above a certain semantic threshold
    """
    THRESHOLD = 0.7
    best_matches = []
    BERT_scores = []
    for s in subtask_decomp_B:
        semantic_score = compare_description_similarity(subtask[2], s[2])
        if semantic_score > THRESHOLD:
            best_matches.append(s)
            BERT_scores.append(semantic_score)

    return best_matches, BERT_scores

def subtask_similarity(subtask_decomp_A: list, subtask_decomp_B: list) -> None:
    '''
    This function calculates the similarity between two subtasks.
    The metric follows the transitive property as sim(A, B) = sim(B, A)
    '''
    weight_temporal = 0.5
    weight_semantic = 0.5

    N = subtask_decomp_A[-1][END_IDX] + 1 # +1 because index starts at 0
    score = 0
    for subtask in subtask_decomp_A:

        subtask_temporal_relative_weight = (subtask[END_IDX]- subtask[START_IDX]+1)/ N

        # find subtasks that intersect temporally
        intersect_subtasks = [s for s in subtask_decomp_B if intersection(subtask, s)]

        # find semantic similarity with all intersecting subtasks
        # at the same time, combine the subtasks that are sequential
        combined_intersect_subtasks = []
        BERT_scores = []
        for intersect_subtask in intersect_subtasks:
            BERT_scores.append(compare_description_similarity(subtask[SUBTASK_NAME_IDX], intersect_subtask[SUBTASK_NAME_IDX]))
            if len(combined_intersect_subtasks) == 0:
                combined_intersect_subtasks.append(intersect_subtask)
            else:
                prev_subtask = combined_intersect_subtasks[-1]
                if prev_subtask[END_IDX] == intersect_subtask[START_IDX] - 1:
                    combined_intersect_subtasks[-1] = (prev_subtask[START_IDX], intersect_subtask[END_IDX], prev_subtask[SUBTASK_NAME_IDX])
                else:
                    combined_intersect_subtasks.append(intersect_subtask)


        # Need to combine sequential subtasks
        IOUs = np.sum([get_IOU(subtask, s) for s in combined_intersect_subtasks])
        relative_IOUs = IOUs * subtask_temporal_relative_weight

        mean_BERT_score = np.mean(BERT_scores)
        relative_BERT_score = mean_BERT_score * subtask_temporal_relative_weight

        if False:
            print(f"Subtask: {subtask[SUBTASK_NAME_IDX]}")
            print(f"(start, end): ({subtask[START_IDX]}, {subtask[END_IDX]})")
            print(f"Intersecting subtasks: {intersect_subtasks}")
            print(f"Combined intersecting subtasks: {combined_intersect_subtasks}")
            print(f"Relative IOU: {relative_IOUs}")
            print(f"IOU: {IOUs}")
            print(f"BERT scores: {BERT_scores}")
            print(N)
            print(" ")
            score += weight_temporal*relative_IOUs + weight_semantic*relative_BERT_score
        score += weight_temporal*relative_IOUs + weight_semantic*relative_BERT_score
    
    return score


In [126]:
filepath = DATA_GT_TXT_PATH + "/Lift_20240213-110117_5_gt.txt"
subtask_decomposition = extract_subtask_from_groundtruth_file(filepath)
print(subtask_decomposition)
filepath = GPT_OUTPUT_PATH + "/Lift_20240213-110117_5.json"
gpt_subtask_decomposition = extract_subtask_from_gpt_output_file(filepath)
print(gpt_subtask_decomposition)

print(" ")
score1 = subtask_similarity(subtask_decomposition, gpt_subtask_decomposition)
print("  ")
print(f"Score1: {score1}")

score2 = subtask_similarity(gpt_subtask_decomposition, subtask_decomposition)
print("  ")
print(f"Score2: {score2}")

[(0, 12, 'Move to cube'), (13, 14, 'Grasp Cube'), (15, 49, 'Lift Cube')]
[(0, 9, 'move end effector towards cube'), (10, 12, 'adjust gripper above cube'), (13, 14, 'close gripper to grasp cube'), (15, 21, 'lift cube upwards'), (22, 49, 'hold cube in lifted position')]
 
  
Score1: 0.8524385000000001
  
Score2: 0.669016
