In [72]:
from stellargraph import datasets, utils
from tensorflow.keras import callbacks, optimizers, losses, metrics, regularizers, Model
import numpy as np
import pandas as pd
from stellargraph import StellarGraph
from stellargraph import IndexedArray

from stellargraph.mapper import KGTripleGenerator
from stellargraph.layer import DistMult

from IPython.display import HTML

import math

In [84]:
epochs = 30
embedding_dimension = 100
negative_samples = 2

In [59]:
def get_v(matrix, i, j):
    # model_j -> data_i / model_j -> data_j # AKA how much better is model_j on data_i than model_i is on data_i
    w = (matrix[j,i]/matrix[i,i] + matrix[i,j]/matrix[j,j]) / 2

    # model_j -> data_i / model_i -> data_i # AKA how much better is model_j on data_i than model_j is on data_j
    # w = matrix[j,i]/matrix[j,j] + matrix[i,j]/matrix[i,i]
    return (1/w)

In [67]:
matrix = np.abs(np.load('transfer_graph.npy'))
envs = ['Ant', 'Crawler', 'Dog', 'Spindra']
symbols = [env+'_'+str(i) for env in envs for i in range(100)]

In [90]:
source = []
target = []
weight = []

t_source = []
t_weight = []
t_target = []
t_label = []

for i in range(len(matrix)):
    for j in range(len(matrix)):
        w = get_v(matrix, i, j)
        weight.append(w)

# thresh = 0
# thresh = np.median(weight)
thresh = np.mean(weight)
# thresh = np.mean(weight) + np.std(weight)
# thresh = 1
weight = []

edge_label = []

distinct_labels = set()

normalising_edge = 0.15

for i in range(len(matrix)):
    s = symbols[i]
    for j in range(len(matrix)):
        t = symbols[j]
        # model_j -> data_i / model_j -> data_j # AKA how much better is model_j on data_i than model_i is on data_i
        w = get_v(matrix, i, j)
        if w > thresh:
            source.append(s)
            target.append(t)
            weight.append(w)
            edge_label.append(math.ceil(w/normalising_edge))
            distinct_labels.add(math.ceil((w/normalising_edge)))
        else :
            t_source.append(s)
            t_target.append(t)
            t_weight.append(w)
            t_label.append(math.ceil(w/normalising_edge))
"""
print(distinct_labels)
final_edges = {}

for label in distinct_labels:
    t_source = []
    t_target = []
    for i in range(len(source)):
        if(label == edge_label[i]):
            t_source.append(source[i])
            t_target.append(target[i])
    
    final_edges[label] = pd.DataFrame(
                    {
                        "source" : t_source,
                        "target" : t_target,
                    }
    )
"""

weighted_edges = pd.DataFrame(
    {
        "source": source,
        "target": target,
        "weight": weight,
        "label": [math.ceil(wt/normalising_edge) for wt in weight],
    }
)

test_edges = pd.DataFrame(
    {
        "source": t_source,
        "target": t_target,
        "weight": t_weight,
        "label": t_label,
    }
)

G_train = pd.DataFrame(
    {
        "source": source,
        "target": target,
        "label": [math.ceil(wt/normalising_edge) for wt in weight],
        
    }
)

G = StellarGraph(edges = weighted_edges, edge_type_column = "label")

print(G.info())

StellarGraph: Undirected multigraph
 Nodes: 400, Edges: 42370

 Node types:
  default: [400]
    Features: none
    Edge types: default-4->default, default-6->default, default-7->default, default-8->default

 Edge types:
    default-7->default: [39038]
        Weights: range=[0.900211, 1.04994], mean=0.98649, std=0.0257155
        Features: none
    default-4->default: [2370]
        Weights: range=[0.49901, 0.558801], mean=0.513076, std=0.0113365
        Features: none
    default-8->default: [552]
        Weights: range=[1.05005, 1.13567], mean=1.065, std=0.0152321
        Features: none
    default-6->default: [410]
        Weights: range=[0.822431, 0.899924], mean=0.880018, std=0.0172948
        Features: none


In [76]:
G_gen = KGTripleGenerator(
    G, batch_size=10  # ~10 batches per epoch
)

G_distmult = DistMult(
    G_gen,
    embedding_dimension=embedding_dimension,
    embeddings_regularizer=regularizers.l2(1e-7),
)

G_inp, G_out = G_distmult.in_out_tensors()

G_model = Model(inputs=G_inp, outputs=G_out)

G_model.compile(
    optimizer=optimizers.Adam(lr=0.001),
    loss=losses.BinaryCrossentropy(from_logits=True),
    metrics=[metrics.BinaryAccuracy(threshold=0.0)],
)

In [82]:
import torch
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [77]:
G_train_gen = G_gen.flow(
    weighted_edges, negative_samples=negative_samples, shuffle=True
)


In [85]:
G_es = callbacks.EarlyStopping(monitor="loss", patience=50)
G_history = G_model.fit(
    G_train_gen,
    epochs=epochs,
    callbacks=[G_es],
    verbose=1,
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [95]:
G_raw_ranks, G_filtered_ranks = G_distmult.rank_edges_against_all_nodes(
    G_gen.flow(weighted_edges), G
)

In [97]:
G_filtered_ranks[:10]

array([[2, 2],
       [1, 1],
       [1, 1],
       [2, 2],
       [2, 1],
       [1, 1],
       [2, 1],
       [1, 1],
       [2, 1],
       [1, 1]])

In [12]:
wn18 = datasets.WN18()
display(HTML(wn18.description))
wn18_graph, wn18_train, wn18_test, wn18_valid = wn18.load()

In [86]:
wn18_test[:5]

Unnamed: 0,source,label,target
0,6845599,_member_of_domain_usage,3754979
1,789448,_verb_group,1062739
2,10217831,_hyponym,10682169
3,8860123,_member_of_domain_region,5688486
4,2233096,_member_meronym,2233338


In [43]:
wn18_gen = KGTripleGenerator(
    wn18_graph, batch_size=len(wn18_train) // 10  # ~10 batches per epoch
)

In [44]:
wn18_train_gen = wn18_gen.flow(
    wn18_train, negative_samples=negative_samples, shuffle=True
)

In [53]:
print(wn18_graph.info())

StellarDiGraph: Directed multigraph
 Nodes: 40943, Edges: 151442

 Node types:
  default: [40943]
    Features: none
    Edge types: default-_also_see->default, default-_derivationally_related_form->default, default-_has_part->default, default-_hypernym->default, default-_hyponym->default, ... (13 more)

 Edge types:
    default-_hyponym->default: [37221]
        Weights: all 1 (default)
        Features: none
    default-_hypernym->default: [37221]
        Weights: all 1 (default)
        Features: none
    default-_derivationally_related_form->default: [31867]
        Weights: all 1 (default)
        Features: none
    default-_member_meronym->default: [7928]
        Weights: all 1 (default)
        Features: none
    default-_member_holonym->default: [7928]
        Weights: all 1 (default)
        Features: none
    default-_part_of->default: [5148]
        Weights: all 1 (default)
        Features: none
    default-_has_part->default: [5142]
        Weights: all 1 (default)
       

In [54]:
print(G.info())

StellarGraph: Undirected multigraph
 Nodes: 400, Edges: 42370

 Node types:
  default: [400]
    Features: none
    Edge types: default-default->default

 Edge types:
    default-default->default: [42370]
        Weights: range=[0.49901, 1.13567], mean=0.960002, std=0.112461
        Features: float32 vector, length 1
