In [1]:
from pathlib import Path
from tqdm import tqdm

In [2]:
input_data_file = Path("./input_corrected.txt")
output_data_file = Path("./output_corrected.txt")

In [3]:
input_data = input_data_file.open('r').readlines()
output_data = output_data_file.open('r').readlines()

In [4]:
input_data = [data.strip() for data in input_data]
output_data = [data.strip() for data in output_data]

# Task Completion

In [5]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/ksmehrab/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [6]:
from scipy.stats import kendalltau

original_order = [0, 1, 2, 3]
generated_order = [0, 1, 1, 1]

# Calculate Kendall Tau distance
kendall_tau_distance, _ = kendalltau(original_order, generated_order)

print(f"Kendall Tau Distance: {kendall_tau_distance}")

Kendall Tau Distance: 0.7071067811865477


In [7]:
from scipy.stats import spearmanr

# Calculate Spearman's Rank Correlation Coefficient
correlation_coefficient, _ = spearmanr(original_order, generated_order)

print(f"Spearman's Rank Correlation Coefficient: {correlation_coefficient}")


Spearman's Rank Correlation Coefficient: 0.7745966692414834


In [8]:
def ordering_accuracy(original_order, generated_order):
    correct_pairs = sum(1 for i, j in zip(original_order, generated_order) if i == j)
    total_pairs = len(original_order)
    accuracy = correct_pairs / total_pairs
    return accuracy

# Calculate ordering accuracy
accuracy = ordering_accuracy(original_order, generated_order)

print(f"Ordering Accuracy: {accuracy * 100:.2f}%")


Ordering Accuracy: 50.00%


In [9]:
# Initialize word2vec model
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize

tokens = []
for sequence in output_data + input_data:
    tokens.append(word_tokenize(sequence))

model = Word2Vec(tokens, vector_size=100, window=5, min_count=1, workers=4)

In [10]:
def get_embedding(sent):
    tokens = word_tokenize(sent)
    return sum([model.wv[token] for token in tokens]) / len(tokens)

In [11]:
def get_embedding_similarity_matched_index(sent, list_to_match):
    scores = []
    sent_emb = get_embedding(sent)
    for match in list_to_match:
        match_emb = get_embedding(match)
        similarity_score = cosine_similarity(sent_emb.reshape(1, -1), match_emb.reshape(1, -1))[0][0]
        scores.append(similarity_score)
    #print(scores)
    max_index = scores.index(max(scores))
    return max_index

In [65]:
kt_scores = []
pcc_scores = []
acc_scores = []
for inp, out in tqdm(zip(input_data, output_data)):
    inp_steps = inp.split('<->')
    inp_steps = [step for step in inp_steps if step != '']
    #print(inp_steps)
    
    out_steps = out.split('<->')
    out_steps = [step for step in out_steps if step != '']
    #print(out_steps)
    
    req_len = len(inp_steps) if len(inp_steps) < len(out_steps) else len(out_steps)
    #print(req_len)
    inp_steps = inp_steps[:req_len]
    out_steps = out_steps[:req_len]
    
    in_seq_order = list(range(req_len))
    out_seq_order = []
    err = False
    for inp_step in inp_steps:
        try:
            max_idx = get_embedding_similarity_matched_index(inp_step, out_steps)
            out_seq_order.append(max_idx)
        except:
            err = True
            continue
    if not err:
        kendall_tau_distance, _ = kendalltau(in_seq_order, out_seq_order)
        #print(kendall_tau_distance)
        kt_scores.append(kendall_tau_distance)
        correlation_coefficient, _ = spearmanr(original_order, generated_order)
        pcc_scores.append(correlation_coefficient)
        accuracy = ordering_accuracy(original_order, generated_order)
        acc_scores.append(accuracy)
    

11933it [06:02, 32.96it/s]


In [66]:
print(f'Kendall Tau Distance: {sum(kt_scores) / len(kt_scores)}')

Kendall Tau Distance: 0.3555586102835615


In [67]:
print(f'Spearman Corr Coeff: {sum(pcc_scores) / len(pcc_scores)}')

Spearman Corr Coeff: 0.7745966692416814


In [68]:
print(f'Ordering Accuracy Score: {sum(acc_scores) / len(acc_scores)}')

Ordering Accuracy Score: 0.5


# Exact Match Of Entire Sequences

In [18]:
input_data_filtered = [data.replace('<->', "").replace(",", "").replace(".", "").replace(" ", "").lower() for data in input_data]

output_data_filtered = [data.replace('<->', "").replace(",", "").replace(".", "").replace(" ", "").lower() for data in output_data]

output_data_filtered

count = 0
for inp, out in zip(input_data_filtered, output_data_filtered):
    if inp == out:
        count += 1

print(f'Exact string match: {count/len(input_data_filtered)}')

Exact string match: 0.009050532137769211


# Tianyu Code

In [19]:
input_lines = input_data
output_lines = output_data
for in_index in range(len(input_lines)):
    ind = input_lines[in_index].rfind("<->")
    input_lines[in_index] = input_lines[in_index][ind+4:].rstrip()
for out_index in range(len(output_lines)):
    ind = output_lines[out_index].rfind("<->")
    output_lines[out_index] = output_lines[out_index][:ind]
    ind = output_lines[out_index].rfind("<->")
    output_lines[out_index] = output_lines[out_index][ind+4:].rstrip()
print(input_lines[0] + "<<=>>" + output_lines[0])

Place the stack of items on the table.<<=>>Put the knife in the black mug.


In [20]:
len(input_lines)

11933

In [21]:
len(output_lines)

11933

In [30]:
input_lines[0]

'Place the stack of items on the table.'

In [28]:
input_data[0]

'Place a mug with a knife in it on a table. <-> Walk ahead a step and then left to the counter in front of the coffee maker. <-> Pick up the butter knife on the counter. <-> Turn left and walk to the sink. <-> Put the knife in the black mug. <-> Pick up the mug. <-> Turn around and face the green table. <-> Place the stack of items on the table.'

In [31]:
output_lines[0]

'Put the knife in the black mug.'

In [32]:
count = 0
for inp, out in zip(input_lines, output_lines):
    if inp == out:
        count += 1

In [34]:
count/len(input_lines)

0.15000419006117488

In [35]:
input_lines

['Place the stack of items on the table.',
 'Put plate on the counter.',
 'Place the credit card on the table to the right of the first credit card.',
 'Put the bowl in the left sink basin.',
 'Put the heated slice of tomato on the edge of the counter in front of the right side of the sink.',
 'Place the cellphone to the left edge of the sofa chair.',
 'put the dispenser on the table',
 'Put the sliced lettuce on the table',
 "Place the phone on the nightstand underneath the CD's.",
 'Turn on the lamp.',
 'Open the topmost right drawer and place the soap inside.',
 'Put the glass in the sink.',
 'Open the drawer put in the knife then close it',
 "Put the pan in the fridge on the middle shelf then close the door when you're done.",
 'Set the apple piece down on the rack to the right of the knife blade.',
 'Place the bowl on the coffee table to the left of the tissues.',
 'Place the tomato in the microwave above the sink.',
 'Put the cell phone on the bed to the right of the other one.',

In [46]:
output_lines[7]

'Grab a a slice of lettuce'

In [45]:
output_data[7]

'Putting cold sliced lettuce on the table <-> Grab the knife off of the table <-> Put the knife on the table <-> Cool the sliced lettuce in the fridge and then take it out <-> Turn around and go to the table <-> Grab a a slice of lettuce <-> Look down at the table'

In [42]:
input_data

['Place a mug with a knife in it on a table. <-> Walk ahead a step and then left to the counter in front of the coffee maker. <-> Pick up the butter knife on the counter. <-> Turn left and walk to the sink. <-> Put the knife in the black mug. <-> Pick up the mug. <-> Turn around and face the green table. <-> Place the stack of items on the table.',
 'Put a plate with a knife on the counter. <-> Move to the table ahead of you <-> Pick up knife from the table <-> Move to the stove to the right of you <-> Place knife on a plate on the stove <-> Pick up plate with knife from the stove <-> Move to the counter to the right of the sink <-> Put plate on the counter.',
 'To move two credit cards to the table. <-> Turn right and walk to the table to the right of the green chair on the left. <-> Pick up the credit card closest to the lamp on the table. <-> Turn left and walk to the table on the right. <-> Place the credit card on the table in front of the alarm clock. <-> Turn, walk to the other 