In [1]:
import os
import sys

import pandas as pd
import numpy as np

import networkx as nx

from basicmemnet import memnet
from basicmemnet import plot_graph

from pykeen.triples import TriplesFactory
from pykeen.pipeline import pipeline
from pykeen.pipeline import plot_losses

In [3]:
md_train = memnet.DSL()
md_test = memnet.DSL()

md_train.import_gml("train_graph.gml")
md_test.import_gml("test_graph.gml")

test = md_test.get_graph()
train = md_train.get_graph()

### Id - link - id

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results.save_to_directory('doctests/nextAction_transe_id_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

### Word - link - word

In [None]:
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        triples.append([head, relation, tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results_1 = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results_1.save_to_directory('doctests/nextAction_transe_word_350epochs')

In [None]:
results_1.metric_results.to_dict()['tail']['realistic']

### 1

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='RotatE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results.save_to_directory('doctests/nextAction_RotatE_id_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

In [None]:
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        triples.append([head, relation, tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results_1 = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='RotatE',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results_1.save_to_directory('doctests/nextAction_RotatE_word_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

### 2

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransR',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results.save_to_directory('doctests/nextAction_TransR_id_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

In [None]:
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        triples.append([head, relation, tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results_1 = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='TransR',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results_1.save_to_directory('doctests/nextAction_TransR_word_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

### 3

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='RESCAL',
    epochs=350,  
    evaluation_relation_whitelist = {'has_next'}
)
results.save_to_directory('doctests/nextAction_RESCAL_id_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']

In [None]:
### Prepare data - put it into appropriate format for Pykeen
def get_triplets(G):
    triples = []
    for u, v, data in G.edges(data=True):
        relation = data.get('link_type')
        triples.append([u, relation, v])
        
        head = G.nodes[u].get('utterances')[0]
        tail = G.nodes[v].get('utterances')[0]
        
        triples.append([u, 'has_utterance', head])
        triples.append([v, 'has_utterance', tail])
    return np.array(triples)

train_triplets = get_triplets(train)
test_triplets = get_triplets(test)
print('Number of train triplets: ', len(train_triplets))
print('Number of test triplets: ',len(test_triplets))
print(test_triplets[:3])

train_triples_factory = TriplesFactory.from_labeled_triples(train_triplets)
test_triples_factory = TriplesFactory.from_labeled_triples(test_triplets)

# Model will be trained on the whole test set, but evaluation will be made only for 'has_element' link, 
# because we want to predictions for the parent actions.
results = pipeline(
    training=train_triples_factory,
    testing=test_triples_factory,
    model='RotatE',
    epochs=350, 
    evaluation_relation_whitelist = {'has_next'} 
)
results.save_to_directory('doctests/nextAction_RotatE_id_word_350epochs')

In [None]:
results.metric_results.to_dict()['tail']['realistic']