- Calculation of centrality measures and their distribution
- Relation between these measures
- Creation of subgraphs based on filters like degree or largest component.
- Running different cluster algorithms
- Comparing solutions of distinct community detections
- Rank possible edges based on parameters like preferential attachment
- Setup a pipeline for graph embeddings with training, test and validation set

In [3]:
# Import of required libraries and packages
import igraph as ig
import easygui
from matplotlib import pyplot as plt
import numpy as np
from datetime import datetime
import scipy
import seaborn as sns
import statistics
import pandas as pd
print(ig.__version__)

0.9.11


In [2]:
from stellargraph import __version__

In [2]:
__version__

'1.3.0b'

## 5

In [1]:
import matplotlib.pyplot as plt
from math import isclose
from sklearn.decomposition import PCA
import os
import networkx as nx
import numpy as np
import pandas as pd
from stellargraph import StellarGraph, datasets
from stellargraph.data import EdgeSplitter
from collections import Counter
import multiprocessing
from IPython.display import display, HTML
from sklearn.model_selection import train_test_split

%matplotlib inline

In [2]:
dataset = datasets.Cora()
graph, _ = dataset.load(largest_connected_component_only=True, str_node_ids=True)

print(graph.info())

StellarGraph: Undirected multigraph
 Nodes: 2485, Edges: 5209

 Node types:
  paper: [2485]
    Features: float32 vector, length 1433
    Edge types: paper-cites->paper

 Edge types:
    paper-cites->paper: [5209]
        Weights: all 1 (default)
        Features: none


In [4]:
def edge_splitter_train_test(graph):
    edge_splitter = EdgeSplitter(graph)
    return edge_splitter.train_test_split(
        p=0.1, method='global'
    )

In [5]:
graph_test, examples_test, labels_test = edge_splitter_train_test(
    graph
)

** Sampled 520 positive and 520 negative edges. **


In [6]:
graph_train, examples, labels = edge_splitter_train_test(
    graph_test
)

** Sampled 468 positive and 468 negative edges. **


In [7]:
(
    examples_train,
    examples_model_selection,
    labels_train,
    labels_model_selection,
) = train_test_split(examples, labels, test_size=0.25)

In [8]:
from stellargraph.data import BiasedRandomWalk

def create_biased_random_walker(graph, walk_num, walk_length):
    # parameter settings for "p" and "q":
    p = 1.0
    q = 1.0
    return BiasedRandomWalk(graph, n=walk_num, length=walk_length, p=p, q=q)

In [9]:
walk_length = 5
epochs = 6
batch_size = 50

learning

In [12]:
graph_node_list = list(graph_train.nodes())

In [15]:
walker = create_biased_random_walker(graph_train, 20, walk_length)

In [17]:
unsupervised_samples = UnsupervisedSampler(
    graph_train, graph_node_list, walker=walker
)

In [19]:
generator = Node2VecLinkGenerator(graph_train, batch_size)

In [20]:
node2vec = Node2Vec(128, generator)

In [21]:
x_inp, x_out = node2vec.in_out_tensors()

In [10]:
def node2vec_embedding(graph, name):

    # Set the embedding dimension and walk number:
    dimension = 128
    walk_number = 20

    print(f"Training Node2Vec for '{name}':")

    graph_node_list = list(graph.nodes())

    # Create the biased random walker to generate random walks
    walker = create_biased_random_walker(graph, walk_number, walk_length)

    # Create the unsupervised sampler to sample (target, context) pairs from random walks
    unsupervised_samples = UnsupervisedSampler(
        graph, nodes=graph_node_list, walker=walker
    )

    # Define a Node2Vec training generator, which generates batches of training pairs
    generator = Node2VecLinkGenerator(graph, batch_size)

    # Create the Node2Vec model
    node2vec = Node2Vec(dimension, generator=generator)

    # Build the model and expose input and output sockets of Node2Vec, for node pair inputs
    x_inp, x_out = node2vec.in_out_tensors()

    # Use the link_classification function to generate the output of the Node2Vec model
    prediction = link_classification(
        output_dim=1, output_act="sigmoid", edge_embedding_method="dot"
    )(x_out)

    # Stack the Node2Vec encoder and prediction layer into a Keras model, and specify the loss
    model = keras.Model(inputs=x_inp, outputs=prediction)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss=keras.losses.binary_crossentropy,
        metrics=[keras.metrics.binary_accuracy],
    )

    # Train the model
    model.fit(
        generator.flow(unsupervised_samples),
        epochs=epochs,
        verbose=2,
        use_multiprocessing=False,
        workers=4,
        shuffle=True,
    )

    # Build the model to predict node representations from node ids with the learned Node2Vec model parameters
    x_inp_src = x_inp[0]
    x_out_src = x_out[0]
    embedding_model = keras.Model(inputs=x_inp_src, outputs=x_out_src)

    # Get representations for all nodes in ``graph``
    node_gen = Node2VecNodeGenerator(graph, batch_size).flow(graph_node_list)
    node_embeddings = embedding_model.predict(node_gen, workers=1, verbose=0)

    def get_embedding(u):
        u_index = graph_node_list.index(u)
        return node_embeddings[u_index]

    return get_embedding

In [None]:
def run_link_prediction(binary_operator, embedding_train):
    clf = train_link_prediction_model(
        examples_train, labels_train,
        embedding_train, binary_operator
    )
    score = evaluate_link_prediction_model(
        clf,
        examples_model_selection,
        labels_model_selection,
        embedding_train,
        binary_operator,
    )