In [2]:
import os
import sys

import pandas as pd
import numpy as np

import networkx as nx

from basicmemnet import memnet
from basicmemnet import plot_graph

from pykeen.triples import TriplesFactory, TriplesNumericLiteralsFactory
from pykeen.pipeline import pipeline
from pykeen.pipeline import plot_losses

In [3]:
md_train = memnet.DSL()
md_test = memnet.DSL()

md_train.import_gml("train_graph.gml")
md_test.import_gml("test_graph.gml")

test = md_test.get_graph()
train = md_train.get_graph()

### Only test triples in test set

In [None]:
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return triples

def literal_to_id(G):
    literal_to_id = {}
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        # n_type = data.get('type')
        if utterance not in literal_to_id.keys():
            literal_to_id[utterance] = len(literal_to_id)
        # if n_type not in literal_to_id.keys():
        #     literal_to_id[n_type] = len(literal_to_id)
    return literal_to_id

def get_numeric_triples(G, literal_to_id):
    numeric_triples = []
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        numeric_triples.append([n, 'utterance', literal_to_id.get(utterance)])  
        numeric_triples.append([n, 'utterance', literal_to_id.get(utterance)])
    return np.array(numeric_triples)

literals_to_id = literal_to_id(nx.union(train, test))

train_triplets  = get_triplets(train)
test_set = get_triplets(test)
test_triplets = []

for triplet in test_set:
    if triplet[1] =='has_element':
       test_triplets.append(triplet)
    else:
        train_triplets.append(triplet)

test_numeric_triples =  get_numeric_triples(test, literals_to_id)
train_numeric_triples = get_numeric_triples(train, literals_to_id)


print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(np.array(train_triplets), 
                                                                           numeric_triples = train_numeric_triples
                                                                           )
test_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(np.array(test_triplets),
                                                                          numeric_triples = test_numeric_triples
                                                                          )

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='ComplExLiteral',
    epochs=350,  
    #evaluation_relation_whitelist = {'has_element'}
)
results.save_to_directory('doctests/parentAction_ComplExLiteral_hasElement_350epochs')

In [None]:
results.metric_results.to_dict()['head']['realistic']

In [None]:
test_triplets

In [None]:
train_triplets

### Id - link - id

In [None]:
### Prepare data - put it into appropriate format for Pykeen
parent_actions = ['task_1_k_cooking', 'task_2_k_cooking_with_bowls', 'task_3_k_pouring', 'task_4_k_wiping',
                  'task_5_k_cereals', 'task_6_w_hard_drive', 'task_7_w_free_hard_drive', 'task_8_w_hammering', 
                  'task_9_w_sawing']

def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        if head in parent_actions:
            u = head
        if tail in parent_actions:
            v = tail
        triples.append([u, relation, v])
    return np.array(triples)

def literal_to_id(G):
    literal_to_id = {}
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        # n_type = data.get('type')
        if utterance not in literal_to_id.keys():
            literal_to_id[utterance] = len(literal_to_id)
        # if n_type not in literal_to_id.keys():
        #     literal_to_id[n_type] = len(literal_to_id)
    return literal_to_id

def get_numeric_triples(G, literal_to_id):
    numeric_triples = []
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        numeric_triples.append([n, 'utterance', literal_to_id.get(utterance)])  
        numeric_triples.append([n, 'utterance', literal_to_id.get(utterance)])
    return np.array(numeric_triples)

literal_to_id = literal_to_id(train)

train_triplets, train_numeric_triples = get_triplets(train), get_numeric_triples(train, literal_to_id)
test_triplets, test_numeric_triples = get_triplets(test), get_numeric_triples(test, literal_to_id)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(train_triplets, 
                                                                           numeric_triples = train_numeric_triples
                                                                           )
test_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(test_triplets,
                                                                          numeric_triples = test_numeric_triples
                                                                          )

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='ComplExLiteral',
    epochs=350,  
    evaluation_relation_whitelist = {'has_element'}
)
results.save_to_directory('doctests/parentAction_ComplExLiteral_350epochs')

In [None]:
results.metric_results.to_dict()['head']['realistic']

### 1

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

def literal_to_id(G):
    literal_to_id = {}
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        if utterance not in literal_to_id.keys():
            literal_to_id[utterance] = len(literal_to_id)
    return literal_to_id

def get_numeric_triples(G, literal_to_id):
    numeric_triples = []
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        numeric_triples.append([n, 'utterance', literal_to_id.get(utterance)])  
    return np.array(numeric_triples)

#literal_to_id = literal_to_id(train)

# train_triplets, train_numeric_triples = get_triplets(train), get_numeric_triples(train, literal_to_id)
# test_triplets, test_numeric_triples = get_triplets(test), get_numeric_triples(test, literal_to_id)
# print('Number of train triplets: ', len(train_triplets))
# print('Number of test triplets: ',len(test_triplets))
# print(test_triplets[:3])

# train_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(train_triplets, 
#                                                                            numeric_triples = train_numeric_triples
#                                                                            )
# test_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(test_triplets,
#                                                                           numeric_triples = test_numeric_triples
#                                                                           )

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='DistMultLiteral',
    epochs=350,  
    evaluation_relation_whitelist = {'has_element'}
)
results.save_to_directory('doctests/parentAction_DistMultLiteral_350epochs')

In [None]:
results.metric_results.to_dict()['head']['realistic']

### 2

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

def literal_to_id(G):
    literal_to_id = {}
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        if utterance not in literal_to_id.keys():
            literal_to_id[utterance] = len(literal_to_id)
    return literal_to_id

def get_numeric_triples(G, literal_to_id):
    numeric_triples = []
    for n, data in G.nodes(data=True):
        utterance = data.get('utterances')[0]
        numeric_triples.append([n, 'utterance', literal_to_id.get(utterance)])  
    return np.array(numeric_triples)

#literal_to_id = literal_to_id(train)

# train_triplets, train_numeric_triples = get_triplets(train), get_numeric_triples(train, literal_to_id)
# test_triplets, test_numeric_triples = get_triplets(test), get_numeric_triples(test, literal_to_id)
# print('Number of train triplets: ', len(train_triplets))
# print('Number of test triplets: ',len(test_triplets))
# print(test_triplets[:3])

# train_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(train_triplets, 
#                                                                            numeric_triples = train_numeric_triples
#                                                                            )
# test_triples_factory = TriplesNumericLiteralsFactory.from_labeled_triples(test_triplets,
#                                                                           numeric_triples = test_numeric_triples
#                                                                           )

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='ComplEx',
    epochs=350,  
    evaluation_relation_whitelist = {'has_element'}
)
results.save_to_directory('doctests/parentAction_ComplEx_350epochs')

In [None]:
results.metric_results.to_dict()['head']['realistic']