# Query Model Repository for best node embeddings model

In [75]:
from hops import model
from hops.model import Metric
MODEL_NAME="NodeEmbeddings"
EVALUATION_METRIC="accuracy"

In [76]:
best_model = model.get_best_model(MODEL_NAME, EVALUATION_METRIC, Metric.MAX)

In [77]:
best_model['experimentId']

'application_1607211657348_0055_1'

# Send Prediction Requests to the Served Model using Hopsworks REST API

In [78]:
import tensorflow as tf
from tensorflow import keras  

import pandas as pd
from stellargraph import StellarDiGraph
from stellargraph.mapper import Node2VecLinkGenerator, Node2VecNodeGenerator
from stellargraph.data import UnsupervisedSampler, BiasedRandomWalk
from stellargraph.layer import Node2Vec
import pydoop.hdfs as pydoop
import hsfs

In [79]:
# Create a connection
connection = hsfs.connection()
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

In [80]:
node_td = fs.get_training_dataset("node_td", 1)
edge_td = fs.get_training_dataset("edges_td", 1)

In [81]:
# Get fg as pandas
node_pdf = node_td.read().toPandas()
edge_pdf = edge_td.read().drop("tran_timestamp").toPandas()

In [82]:
node_data = pd.DataFrame(node_pdf[['tx_behavior_id','prior_sar','initial_deposit']], index=node_pdf['id'])

print('Defining StellarDiGraph')
G =StellarDiGraph(node_data,
                      edges=edge_pdf, 
                      edge_type_column="tx_type")


Defining StellarDiGraph

In [83]:
walk_number = 2
walk_length = 2
batch_size = 1
emb_size = 16
# Extracting node embeddings
walker = BiasedRandomWalk(
        G,
        n=walk_number,
        length=walk_length,
        p=0.5,  # defines probability, 1/p, of returning to source node
        q=2.0,  # defines probability, 1/q, for moving to a node away from the source node
    )
unsupervised_samples = UnsupervisedSampler(G, nodes=list(G.nodes()), walker=walker)
generator = Node2VecLinkGenerator(G, batch_size)

node2vec = Node2Vec(emb_size, generator=generator)
x_inp, x_out = node2vec.in_out_tensors()

x_inp_src = x_inp[0]
x_out_src = x_out[0]
embedding_model = keras.Model(inputs=x_inp_src, outputs=x_out_src)

In [87]:
latest = tf.train.latest_checkpoint("hdfs:///Projects/amlsim2/Experiments/" + best_model['experimentId'])
embedding_model.load_weights(latest)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus object at 0x7f50021bdd50>

In [88]:
nodes = list(G.nodes())
node_gen = Node2VecNodeGenerator(G, batch_size).flow(nodes)
node_embeddings = embedding_model.predict(node_gen)

In [89]:
node_embeddings[0].tolist()

[0.9172478914260864, -0.7969544529914856, 0.6742742657661438, -0.44710245728492737, -0.5235152840614319, 0.11767943948507309, -0.14475393295288086, 0.43419796228408813, -0.7660918831825256, -0.018454356119036674, 0.8042338490486145, -0.7543022632598877, -0.1988595575094223, 0.8307299017906189, -0.8374922871589661, -0.10913538932800293]