In [1]:
# import necessary stuff and python-wrapper of verse
import os
import pprint
import numpy as np
import pandas as pd
import networkx as nx
import tensorflow as tf
import pickle
from tensorflow.contrib.tensorboard.plugins import projector

from verse.python.wrapper import VERSE
from multi_class_classification import MultiClassClassification
from multi_label_classification import MultiLabelClassification
from clustering import Clustering
from link_prediction import LinkPrediction
from experiment import Experiment

  return f(*args, **kwds)


In [2]:
# initialize pretty printer
pp = pprint.PrettyPrinter(indent=4, depth=8)

In [3]:
# configure telegram notifier bot
my_telegram_config = {
    "telegram": {
        "token": "350553078:AAEu70JDqMFcG_x5eBD3nqccTvc4aFNMKkg",
        "chat_id": "126551968",
        "verbose": 1
    }
}

In [4]:
# define hyper-parameters
n_hidden = 128
LEARN_EMBEDDINGS_PROJECTION = True
EXPORT_NODE_LABEL_META_DATA = False

In [5]:
# read *.bin file with precomputed verse-ppr embeddings
verse_ppr_embeddings_file_path = 'results/blogcatalog/blogcatalog_verse_embeddings.bin'
verse_ppr_embeddings_file = open(verse_ppr_embeddings_file_path, "r")
verse_ppr_embeddings_file_content = np.fromfile(verse_ppr_embeddings_file, dtype=np.float32)
num_of_nodes = int(np.shape(verse_ppr_embeddings_file_content)[0] / n_hidden)
verse_ppr_embeddings = verse_ppr_embeddings_file_content.reshape((num_of_nodes, n_hidden))

In [6]:
# read *.emb file with precomputed hete-verse embeddings
hete_verse_embeddings_file_path = 'results/blogcatalog/blogcatalog_heteverse_embeddings_v1.emb'
hete_verse_embeddings_file = open(hete_verse_embeddings_file_path, "r")
hete_verse_embeddings_file_content = np.fromfile(hete_verse_embeddings_file, dtype=np.float32)
num_of_nodes = int(np.shape(hete_verse_embeddings_file_content)[0] / n_hidden)
hete_verse_embeddings = hete_verse_embeddings_file_content.reshape((num_of_nodes, n_hidden))

In [7]:
# check shape of trained verse-ppr embedding matrix
np.shape(verse_ppr_embeddings)

(10351, 128)

In [8]:
# check shape of trained hete-verse embedding matrix
np.shape(hete_verse_embeddings)

(10351, 128)

In [9]:
# define dataset file paths
dataset_path = 'data/BlogCatalog-dataset/data/'
friend_edges_csv_path = dataset_path + 'edges.csv'
group_edges_csv_path = dataset_path + 'group-edges.csv'
groups_csv_path = dataset_path + 'groups.csv'
bloggers_csv_path = dataset_path + 'nodes.csv'

In [10]:
# store cvs contents in dataframe
friend_edges_df = pd.read_csv(friend_edges_csv_path, sep=',', header=None, dtype={0: str, 1:str})
group_edges_df = pd.read_csv(group_edges_csv_path, sep=',', header=None, dtype={0: str, 1:str})
groups_df = pd.read_csv(groups_csv_path, sep=',', header=None, dtype={0: str})
bloggers_df = pd.read_csv(bloggers_csv_path, sep=',', header=None, dtype={0: str})

In [11]:
# give bloggers and groups unique node-ids
bloggers_df[0] = 'b' + bloggers_df[0]
friend_edges_df = 'b' + friend_edges_df
groups_df[0] = 'g' + groups_df[0]
group_edges_df[0] = 'b' + group_edges_df[0]
group_edges_df[1] = 'g' + group_edges_df[1]

In [12]:
# define networkx graph
blog_catalog_graph = nx.Graph()

In [13]:
# define node and edge label constants
IS_MEMBER_OF = 'is_member_of'
IS_FRIEND_WITH = 'is_friend_with'
BLOGGER = 'blogger'
GROUP = 'group'

In [14]:
# add blogger nodes to graph
blog_catalog_graph.add_nodes_from(bloggers_df[0].tolist(), label=BLOGGER)
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))
blog_catalog_graph.add_nodes_from(groups_df[0].tolist(), label=GROUP)
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))

10312 nodes in graph
10351 nodes in graph


In [15]:
# create edge tuples from dataframe
group_edges = list(zip(group_edges_df[0].tolist(), group_edges_df[1].tolist()))
friend_edges = list(zip(friend_edges_df[0].tolist(), friend_edges_df[1].tolist()))

In [16]:
# add (blogger)-[is_member_of]-(group) edges to graph
blog_catalog_graph.add_edges_from(group_edges, label=IS_MEMBER_OF)
print("{} edges in graph".format(blog_catalog_graph.number_of_edges()))
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))

14476 edges in graph
10351 nodes in graph


In [17]:
# add (blogger)-[is_friend_with]-(blogger) edges to graph
blog_catalog_graph.add_edges_from(friend_edges, label=IS_FRIEND_WITH)
print("{} edges in graph".format(blog_catalog_graph.number_of_edges()))
print("{} nodes in graph".format(blog_catalog_graph.number_of_nodes()))

348459 edges in graph
10351 nodes in graph


In [18]:
# compute average degree of all nodes in graph
node_degrees = np.array(list(dict(blog_catalog_graph.degree(list(blog_catalog_graph.nodes))).values()),dtype=np.int64)
avg_node_degree = np.mean(node_degrees)
print("The avg. node degree is {}".format(np.round(avg_node_degree, decimals=2)))

The avg. node degree is 67.33


In [19]:
# load id-to-node mapping of verse embeddings
id2node_filepath = 'data/BlogCatalog-dataset/data/blogcatalog_mapping_ids_to_nodes.p'
id_2_node = {}
with open(id2node_filepath, 'rb') as id_2_node_file:
    id_2_node = pickle.load(id_2_node_file)

In [20]:
# load node-to-id mapping of verse embeddings
node2id_filepath = 'data/BlogCatalog-dataset/data/blogcatalog_mapping_nodes_to_ids.p'
node_2_id = {}
with open(node2id_filepath, 'rb') as node_2_id_file:
    node_2_id = pickle.load(node_2_id_file)

In [21]:
# define tensorflow log-directory
LOG_DIR = os.path.dirname(os.path.realpath('results/blogcatalog/visualizations/')) + '/visualizations/'

In [22]:
# define node labels
BLOGGER = 'blogger'
GROUP = 'group'

In [23]:
# write node labels to metadata tensorflow file
node_labels = [blog_catalog_graph.nodes[id_2_node[i]]['label'] for i in range(np.shape(verse_ppr_embeddings)[0])]   

In [24]:
# write node label list to file
metadata_file_path = LOG_DIR + 'metadata_verse_node_labels.csv'

if EXPORT_NODE_LABEL_META_DATA:
    metadata_file = open(metadata_file_path, 'w')

    for node_label in node_labels:
        metadata_file.write("{}\n".format(node_label))

    metadata_file.close()

In [None]:
# train tensorflow verse-ppr projector
if LEARN_EMBEDDINGS_PROJECTION:
    verse_ppr_embedding_var = tf.Variable(verse_ppr_embeddings, name='blogcatalog_verse_ppr_embeddings')
    
    with tf.Session() as sess:
        sess.run(verse_ppr_embedding_var.initializer)

        writer = tf.summary.FileWriter(LOG_DIR + 'verse_ppr_viz_models/', sess.graph)

        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = verse_ppr_embedding_var.name
        embedding.metadata_path = metadata_file_path

        projector.visualize_embeddings(writer, config)

        saver_embed = tf.train.Saver([verse_ppr_embedding_var])
        saver_embed.save(sess, LOG_DIR + 'verse_ppr_viz_models/' + 'blogcatalog_verse_ppr_embeddings_viz.ckpt', 1)

In [25]:
# train tensorflow hete-verse projector
if LEARN_EMBEDDINGS_PROJECTION:
    hete_verse_embedding_var = tf.Variable(hete_verse_embeddings, name='blogcatalog_hete_verse_embeddings')
    
    with tf.Session() as sess:
        sess.run(hete_verse_embedding_var.initializer)

        writer = tf.summary.FileWriter(LOG_DIR + 'hete_verse_viz_models/', sess.graph)

        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = hete_verse_embedding_var.name
        embedding.metadata_path = metadata_file_path

        projector.visualize_embeddings(writer, config)

        saver_embed = tf.train.Saver([hete_verse_embedding_var])
        saver_embed.save(sess, LOG_DIR + 'hete_verse_viz_models/' + 'blogcatalog_hete_verse_embeddings_viz.ckpt', 1)