In [1]:
import os
import sys

import pandas as pd
import numpy as np

import networkx as nx

from basicmemnet import memnet
from basicmemnet import plot_graph

from pykeen.triples import TriplesFactory
from pykeen.pipeline import pipeline
from pykeen.pipeline import plot_losses

In [3]:
md_train = memnet.DSL()
md_test = memnet.DSL()

md_train.import_gml("train_with_wordnet.gml")
md_test.import_gml("test_with_wordnet.gml")

test = md_test.get_graph()
train = md_train.get_graph()

### Id - link - id

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_element'}
)
results.save_to_directory('doctests/parentAction_transe_id_wordnet_350epochs')

In [None]:
results.metric_results.to_dict()['head']['realistic']

### Word - link - word

In [None]:
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        triples.append([head, relation, tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results_1 = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_element'}
)
results_1.save_to_directory('doctests/parentAction_transe_word_350epochs')

In [None]:
results_1.metric_results.to_dict()['head']['realistic']

### id - link - id, id - has_utterance - word

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
        
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        
        triples.append([u, 'has_utterance', head])
        triples.append([v, 'has_utterance', tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransE',
    epochs=250,  
    evaluation_relation_whitelist = {'has_utterance'}
)
results.save_to_directory('doctests/parentAction_transe_id_word_250epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

### action+object - link - parent action

In [None]:
parent_actions = ['task_1_k_cooking', 'task_2_k_cooking_with_bowls', 'task_3_k_pouring', 'task_4_k_wiping',
                  'task_5_k_cereals', 'task_6_w_hard_drive', 'task_7_w_free_hard_drive', 'task_8_w_hammering', 
                  'task_9_w_sawing']

def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        if relation == 'has_object':
            pass

        head = G.nodes[u].get('utterances')[0]
        # If subaction, and has object add object to head.
        if G.nodes[u].get('type')=='acion' and head not in parent_actions:
            for a, b, link in G.edges(u, data=True):
                if link['link_type'] == 'has_object':
                    head = head + ' ' + G.nodes[b].get('utterances')[0]

        tail = G.nodes[v].get('utterances')[0]
         # If subaction, and has object add object to tail.
        if G.nodes[v].get('type')=='acion' and tail not in parent_actions:
            for a, b, link in G.edges(v, data=True):
                if link['link_type'] == 'has_object':
                    head = head + ' ' + G.nodes[b].get('utterances')[0]
        triples.append([head, relation, tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results_1 = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_element'}
)
results_1.save_to_directory('doctests/parentAction_transe_word+object_250epochs')

In [None]:
results_1.save_to_directory('doctests/parentAction_transe_word+object_350epochs')

In [None]:
results_1.metric_results.to_dict()['head']['realistic']