In [1]:
import os
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import collections

import networkx as nx
import torch

from basicmemnet import memnet
from basicmemnet import plot_graph

from pykeen.triples import TriplesFactory, TriplesNumericLiteralsFactory, CoreTriplesFactory
from pykeen.pipeline import pipeline
from pykeen.models import Model

In [3]:
md_test = memnet.DSL()

md_test.import_gml("test_graph.gml")

test = md_test.get_graph()

def get_triplets(G):
    triples = []
    id_to_utterance = {}
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        # triples.append([u, relation, v])
        triples.append([u, relation, v])
        id_to_utterance[u] = head
        id_to_utterance[v] = tail
    return np.array(triples), id_to_utterance

test_triplets, id_to_utterance = get_triplets(test)
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

Number of test triplets:  6680
[['668f64fb8cc61c379809bfe8' 'has_actor' '668f64fb8cc61c379809bfe9']
 ['668f64fb8cc61c379809bfe8' 'has_element' '668f64fb8cc61c379809bfea']
 ['668f64fb8cc61c379809bfe8' 'has_element' '668f64fb8cc61c379809bfeb']]


### RotatE parentAction

In [12]:
# model_version = "parentAction_transe_word_350epochs"
model_version = "parentAction_RotatE_word_200epochs"
trained_model_path = f"doctests/{model_version}/trained_model.pkl"
model = torch.load(trained_model_path)

triples_factory = TriplesFactory.from_path_binary(f"doctests/{model_version}/training_triples")
entity_to_id = triples_factory.entity_to_id
entity_id_to_label = triples_factory.entity_id_to_label
relation_to_id = triples_factory.relation_to_id

test_set = np.array([triple for triple in test_triplets if 'has_element' in triple])
mapped_test_data = np.array([[entity_to_id[id_to_utterance[h]], relation_to_id[r], entity_to_id[id_to_utterance[t]]] for h, r, t in test_set])

In [13]:
true_labels = mapped_test_data[:, 0]
print("true_labels", true_labels)
rel_tail = mapped_test_data[:, 1:]
print("rel_tail", rel_tail)

all_scores = model.predict_h(rt_batch=torch.tensor(rel_tail))
predicted_heads= [scores.argmax().item() for scores in all_scores]

sum(predicted_heads == true_labels)/len(true_labels)*100

true_labels [29 29 29 ... 37 37 37]
rel_tail [[ 1 12]
 [ 1  0]
 [ 1 14]
 ...
 [ 1  0]
 [ 1 11]
 [ 1 17]]


15.335326783425888

In [14]:
df = []
for index in range (len(test_set)):
    parend_id = test_set[index][0]
    pred = entity_id_to_label[predicted_heads[index]]
    df.append((parend_id, pred))

df1 = pd.DataFrame(df).groupby(0).agg(list).reset_index()
df1.head()

df1[1] = df1[1].apply(lambda x: max(collections.Counter(x), key=collections.Counter(x).get))
df1

df1[0] = df1[0].apply(lambda x: id_to_utterance[x])

sum(df1[0] == df1[1])/len(df1[0])

0.1111111111111111

### RESCAL parent Action

In [62]:
# model_version = "parentAction_transe_word_350epochs"
model_version = "parentAction_RESCAL_word_350epochs"
trained_model_path = f"doctests/{model_version}/trained_model.pkl"
model = torch.load(trained_model_path)

triples_factory = TriplesFactory.from_path_binary(f"doctests/{model_version}/training_triples")
entity_to_id = triples_factory.entity_to_id
entity_id_to_label = triples_factory.entity_id_to_label
relation_to_id = triples_factory.relation_to_id

In [63]:
test_set = np.array([triple for triple in test_triplets if 'has_element' in triple])
mapped_test_data = np.array([[entity_to_id[id_to_utterance[h]], relation_to_id[r], entity_to_id[id_to_utterance[t]]] for h, r, t in test_set])

In [64]:
true_labels = mapped_test_data[:, 0]
print("true_labels", true_labels)
rel_tail = mapped_test_data[:, 1:]
print("rel_tail", rel_tail)

true_labels [29 29 29 ... 37 37 37]
rel_tail [[ 1 12]
 [ 1  0]
 [ 1 14]
 ...
 [ 1  0]
 [ 1 11]
 [ 1 17]]


In [65]:
all_scores = model.predict_h(rt_batch=torch.tensor(rel_tail))
predicted_heads= [scores.argmax().item() for scores in all_scores]

sum(predicted_heads == true_labels)/len(true_labels)*100

12.259718069201195

In [67]:
df = []
for index in range (len(test_set)):
    parend_id = test_set[index][0]
    pred = entity_id_to_label[predicted_heads[index]]
    df.append((parend_id, pred))
df

[('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f64fb8cc61c379809bfe8', 'task_2_k_cooking_with_bowls'),
 ('668f6

In [68]:
df1 = pd.DataFrame(df).groupby(0).agg(list).reset_index()
df1.head()

Unnamed: 0,0,1
0,668f64fb8cc61c379809bfe8,"[task_2_k_cooking_with_bowls, task_2_k_cooking..."
1,668f64fb8cc61c379809bfff,"[task_2_k_cooking_with_bowls, task_2_k_cooking..."
2,668f64fb8cc61c379809c0c7,"[task_2_k_cooking_with_bowls, task_2_k_cooking..."
3,668f64fb8cc61c379809c0dd,"[task_2_k_cooking_with_bowls, task_2_k_cooking..."
4,668f64fb8cc61c379809c182,"[task_2_k_cooking_with_bowls, task_2_k_cooking..."


In [69]:
df1[1] = df1[1].apply(lambda x: max(collections.Counter(x), key=collections.Counter(x).get))
df1

Unnamed: 0,0,1
0,668f64fb8cc61c379809bfe8,task_2_k_cooking_with_bowls
1,668f64fb8cc61c379809bfff,task_2_k_cooking_with_bowls
2,668f64fb8cc61c379809c0c7,task_2_k_cooking_with_bowls
3,668f64fb8cc61c379809c0dd,task_2_k_cooking_with_bowls
4,668f64fb8cc61c379809c182,task_2_k_cooking_with_bowls
...,...,...
103,668f64fc8cc61c379809f366,task_2_k_cooking_with_bowls
104,668f64fc8cc61c379809f4ae,task_2_k_cooking_with_bowls
105,668f64fc8cc61c379809f4d3,task_2_k_cooking_with_bowls
106,668f64fc8cc61c379809f574,task_2_k_cooking_with_bowls


In [70]:
df1[0] = df1[0].apply(lambda x: id_to_utterance[x])

sum(df1[0] == df1[1])/len(df1[0])

0.1111111111111111

In [71]:
df1

Unnamed: 0,0,1
0,task_1_k_cooking,task_2_k_cooking_with_bowls
1,task_1_k_cooking,task_2_k_cooking_with_bowls
2,task_2_k_cooking_with_bowls,task_2_k_cooking_with_bowls
3,task_2_k_cooking_with_bowls,task_2_k_cooking_with_bowls
4,task_3_k_pouring,task_2_k_cooking_with_bowls
...,...,...
103,task_7_w_free_hard_drive,task_2_k_cooking_with_bowls
104,task_8_w_hammering,task_2_k_cooking_with_bowls
105,task_8_w_hammering,task_2_k_cooking_with_bowls
106,task_9_w_sawing,task_2_k_cooking_with_bowls


In [73]:
preds = []
trues = []
for i, (pred, true) in enumerate(zip(predicted_heads, true_labels)):
    predicted_head_str = entity_id_to_label[pred]
    preds.append(predicted_head_str)
    true_head_str = entity_id_to_label[true]
    trues.append(true_head_str)
    print(f"Prediction {i+1}: Predicted head = {predicted_head_str}, True head = {true_head_str}")

Prediction 1: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 2: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 3: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 4: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 5: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 6: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 7: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 8: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 9: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 10: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 11: Predicted head = task_2_k_cooking_with_bowls, True head = task_1_k_cooking
Prediction 12: Pred

### TRANSE next action

In [94]:
# model_version = "parentAction_transe_word_350epochs"
model_version = "nextAction_RotatE_word_350epochs"
trained_model_path = f"doctests/{model_version}/trained_model.pkl"
model = torch.load(trained_model_path)

triples_factory = TriplesFactory.from_path_binary(f"doctests/{model_version}/training_triples")
entity_to_id = triples_factory.entity_to_id
entity_id_to_label = triples_factory.entity_id_to_label
relation_to_id = triples_factory.relation_to_id

In [95]:
test_set = np.array([triple for triple in test_triplets if 'has_next' in triple])
mapped_test_data = np.array([[entity_to_id[id_to_utterance[h]], relation_to_id[r], entity_to_id[id_to_utterance[t]]] for h, r, t in test_set])

In [122]:
# tetest_set = np.array([triple for triple in test_triplets if 'has_element' in triple])
# mapped_test_data = np.array([[entity_to_id[h], relation_to_id[r], entity_to_id[t]] for h, r, t in tetest_set])

In [83]:
len(test_set)

2126

In [96]:
true_labels = mapped_test_data[:, 2] #next action prediction
print("true_labels", true_labels)
head_rel = mapped_test_data[:, :2]
print("head_rel", head_rel)

all_scores = model.predict_t(hr_batch=torch.tensor(head_rel))
predicted_tails= [scores.argmax().item() for scores in all_scores]

print(accuracy_score(true_labels, np.array(predicted_tails)))

true_labels [ 0 14 22 ...  0 11 17]
head_rel [[12  2]
 [ 0  2]
 [14  2]
 ...
 [12  2]
 [ 0  2]
 [11  2]]
0.33113828786453436


In [97]:
preds = []
trues = []
for i, (pred, true) in enumerate(zip(predicted_tails, true_labels)):
    predicted_head_str = entity_id_to_label[pred]
    preds.append(predicted_head_str)
    true_head_str = entity_id_to_label[true]
    trues.append(true_head_str)
    print(f"Prediction {i+1}: Predicted next action = {predicted_head_str}, True = {true_head_str}")

Prediction 1: Predicted next action = approach, True = approach
Prediction 2: Predicted next action = retreat, True = lift
Prediction 3: Predicted next action = retreat, True = stir
Prediction 4: Predicted next action = place, True = place
Prediction 5: Predicted next action = retreat, True = retreat
Prediction 6: Predicted next action = approach, True = approach
Prediction 7: Predicted next action = retreat, True = hold
Prediction 8: Predicted next action = place, True = retreat
Prediction 9: Predicted next action = retreat, True = approach
Prediction 10: Predicted next action = retreat, True = lift
Prediction 11: Predicted next action = retreat, True = pour
Prediction 12: Predicted next action = place, True = place
Prediction 13: Predicted next action = retreat, True = retreat
Prediction 14: Predicted next action = retreat, True = approach
Prediction 15: Predicted next action = retreat, True = hold
Prediction 16: Predicted next action = place, True = retreat
Prediction 17: Predicted 

### TransE parent action

In [5]:
model_version = "parentAction_transe_word_350epochs"
# model_version = "parentAction_RotatE_word_200epochs"
trained_model_path = f"doctests/{model_version}/trained_model.pkl"
model = torch.load(trained_model_path)

triples_factory = TriplesFactory.from_path_binary(f"doctests/{model_version}/training_triples")
entity_to_id = triples_factory.entity_to_id
entity_id_to_label = triples_factory.entity_id_to_label
relation_to_id = triples_factory.relation_to_id

test_set = np.array([triple for triple in test_triplets if 'has_element' in triple])
mapped_test_data = np.array([[entity_to_id[id_to_utterance[h]], relation_to_id[r], entity_to_id[id_to_utterance[t]]] for h, r, t in test_set])

In [7]:
true_labels = mapped_test_data[:, 0]
print("true_labels", true_labels)
rel_tail = mapped_test_data[:, 1:]
print("rel_tail", rel_tail)

all_scores = model.predict_h(rt_batch=torch.tensor(rel_tail))
predicted_heads= [scores.argmax().item() for scores in all_scores]

sum(predicted_heads == true_labels)/len(true_labels)*100

true_labels [29 29 29 ... 37 37 37]
rel_tail [[ 1 12]
 [ 1  0]
 [ 1 14]
 ...
 [ 1  0]
 [ 1 11]
 [ 1 17]]


12.174284493806066

In [8]:
df = []
for index in range (len(test_set)):
    parend_id = test_set[index][0]
    pred = entity_id_to_label[predicted_heads[index]]
    df.append((parend_id, pred))

In [9]:
df1 = pd.DataFrame(df).groupby(0).agg(list).reset_index()
df1.head()

Unnamed: 0,0,1
0,668f64fb8cc61c379809bfe8,"[task_9_w_sawing, task_4_k_wiping, task_8_w_ha..."
1,668f64fb8cc61c379809bfff,"[task_9_w_sawing, task_4_k_wiping, task_8_w_ha..."
2,668f64fb8cc61c379809c0c7,"[task_9_w_sawing, task_4_k_wiping, task_8_w_ha..."
3,668f64fb8cc61c379809c0dd,"[task_9_w_sawing, task_4_k_wiping, task_8_w_ha..."
4,668f64fb8cc61c379809c182,"[task_9_w_sawing, task_4_k_wiping, task_8_w_ha..."


In [10]:
df1[1] = df1[1].apply(lambda x: max(collections.Counter(x), key=collections.Counter(x).get))
df1

Unnamed: 0,0,1
0,668f64fb8cc61c379809bfe8,task_4_k_wiping
1,668f64fb8cc61c379809bfff,task_4_k_wiping
2,668f64fb8cc61c379809c0c7,task_4_k_wiping
3,668f64fb8cc61c379809c0dd,task_4_k_wiping
4,668f64fb8cc61c379809c182,task_3_k_pouring
...,...,...
103,668f64fc8cc61c379809f366,task_4_k_wiping
104,668f64fc8cc61c379809f4ae,task_4_k_wiping
105,668f64fc8cc61c379809f4d3,task_8_w_hammering
106,668f64fc8cc61c379809f574,task_9_w_sawing


In [11]:
df1[0] = df1[0].apply(lambda x: id_to_utterance[x])

In [12]:
sum(df1[0] == df1[1])/len(df1[0])

0.28703703703703703

In [14]:
from sklearn.metrics import confusion_matrix
confusion_matrix(df1[0], df1[1])

array([[ 0,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 0,  0,  5,  0,  0,  0,  0,  7,  0],
       [ 0,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 0,  0,  1, 11,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 0,  0,  1,  8,  0,  0,  0,  3,  0],
       [ 0,  0,  0,  1,  0,  0,  0,  0, 11]], dtype=int64)

In [13]:
(df1[0], df1[1])

0.28703703703703703

In [15]:
df1[0].value_counts()

0
task_1_k_cooking               12
task_2_k_cooking_with_bowls    12
task_3_k_pouring               12
task_4_k_wiping                12
task_5_k_cereals               12
task_6_w_hard_drive            12
task_7_w_free_hard_drive       12
task_8_w_hammering             12
task_9_w_sawing                12
Name: count, dtype: int64

In [16]:
df1[1].value_counts()

1
task_4_k_wiping       80
task_9_w_sawing       11
task_8_w_hammering    10
task_3_k_pouring       7
Name: count, dtype: int64

In [126]:
preds = []
trues = []
for i, (pred, true) in enumerate(zip(predicted_heads, true_labels)):
    predicted_head_str = entity_id_to_label[pred]
    preds.append(predicted_head_str)
    true_head_str = train_triples_factory.entity_id_to_label[true]
    trues.append(true_head_str)
    print(f"Prediction {i+1}: Predicted head = {predicted_head_str}, True head = {true_head_str}")


Prediction 1: Predicted head = task_9_w_sawing, True head = task_1_k_cooking
Prediction 2: Predicted head = task_4_k_wiping, True head = task_1_k_cooking
Prediction 3: Predicted head = task_8_w_hammering, True head = task_1_k_cooking
Prediction 4: Predicted head = task_6_w_hard_drive, True head = task_1_k_cooking
Prediction 5: Predicted head = task_7_w_free_hard_drive, True head = task_1_k_cooking
Prediction 6: Predicted head = task_3_k_pouring, True head = task_1_k_cooking
Prediction 7: Predicted head = task_9_w_sawing, True head = task_1_k_cooking
Prediction 8: Predicted head = task_4_k_wiping, True head = task_1_k_cooking
Prediction 9: Predicted head = task_5_k_cereals, True head = task_1_k_cooking
Prediction 10: Predicted head = task_3_k_pouring, True head = task_1_k_cooking
Prediction 11: Predicted head = task_4_k_wiping, True head = task_1_k_cooking
Prediction 12: Predicted head = task_8_w_hammering, True head = task_1_k_cooking
Prediction 13: Predicted head = task_2_k_cooking_wi

In [134]:
import collections

collections.Counter(preds)

Counter({'task_4_k_wiping': 599,
         'task_3_k_pouring': 531,
         'task_8_w_hammering': 319,
         'task_7_w_free_hard_drive': 278,
         'task_5_k_cereals': 267,
         'task_9_w_sawing': 251,
         'task_2_k_cooking_with_bowls': 71,
         'task_6_w_hard_drive': 25})

In [163]:
collections.Counter(trues)

Counter({'task_5_k_cereals': 415,
         'task_8_w_hammering': 376,
         'task_6_w_hard_drive': 316,
         'task_7_w_free_hard_drive': 265,
         'task_2_k_cooking_with_bowls': 220,
         'task_1_k_cooking': 216,
         'task_4_k_wiping': 181,
         'task_9_w_sawing': 178,
         'task_3_k_pouring': 174})

In [None]:
# parentAction_transe_word_350epochs  12%