Berner Fachhochschule BFH - MAS Data Science - Graph Machine Learning - Master Thesis FS/2022 Thomas Iten

# 16. Performance Messung GNN

In [5]:
import numpy as np
import networkx as nx
from tensorflow import keras
from stellargraph import StellarGraph
from stellargraph.data import EdgeSplitter
from stellargraph.mapper import GraphSAGELinkGenerator
from stellargraph.layer import GraphSAGE, link_classification
from gml.graph.data_factory import DataFactory
from gml.measure.system_meter import SystemMeter, CpuSystemMeter, MemorySystemMeter, TimeSystemMeter

## 16.1 Messung Graph Erstellung, Training und Link Prediction

In [6]:
# init system meters
meters = [TimeSystemMeter(), MemorySystemMeter(), CpuSystemMeter()]

# init data
labels   = ["Measure A", "Measure B", "Measure C", "Measure D"]
n_graphs = [100, 200, 3000, 6000]
measures = []

print("Start performance measures:")
for n_graph in n_graphs:

    # create graph
    for meter in meters:
        meter.start()

    graph = DataFactory().create_graph(n=n_graph, add_dc=True, connected=True)

    # Testdaten
    edgeSplitter = EdgeSplitter(graph.graph)
    test_graph, test_samples, test_labels  = edgeSplitter.train_test_split(p=0.1, method="global", keep_connected=True)

    # Trainingsdaten
    edgeSplitter = EdgeSplitter(test_graph, graph.graph)
    train_graph, train_samples, train_labels = edgeSplitter.train_test_split(p=0.1, method="global", keep_connected=True)

    # Node Features hinzufügen
    node_features="idm"
    eye = np.eye(test_graph.number_of_nodes())                      # Identiy matrix (idm) with size = number of nodes
    idm = {n:eye[i] for i,n in enumerate(test_graph.nodes())}       # Dictionary with node number as key and corresponding idm row as value
    nx.set_node_attributes(test_graph, idm, node_features)          # Assign node features (with name 'idm') to the test_graph nodes

    eye = np.eye(train_graph.number_of_nodes())
    idm = {n:eye[i] for i,n in enumerate(train_graph.nodes())}
    nx.set_node_attributes(train_graph, idm, node_features)

    # Link Generator definieren
    batch_size = 64
    num_samples = [4, 4]

    train_sg   = StellarGraph.from_networkx(train_graph, node_features=node_features)
    train_gen  = GraphSAGELinkGenerator(train_sg, batch_size, num_samples)
    train_flow = train_gen.flow(train_samples, train_labels, shuffle=True, seed=24)

    test_sg    = StellarGraph.from_networkx(test_graph, node_features=node_features)
    test_gen   = GraphSAGELinkGenerator(test_sg, batch_size, num_samples)
    test_flow  = test_gen.flow(test_samples, test_labels, seed=24)

    # Model training
    layer_sizes = [20, 20]
    graphsage = GraphSAGE(
        layer_sizes=layer_sizes,
        generator=train_gen,
        bias=True,
        dropout=0.3)

    x_inp, x_out = graphsage.in_out_tensors()
    prediction   = link_classification(output_dim=1, output_act="sigmoid", edge_embedding_method="ip")(x_out)

    model = keras.Model(inputs=x_inp, outputs=prediction)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss=keras.losses.mse,
        metrics=["acc"],
    )

    # Model trainieren
    epochs = 12
    history = model.fit(train_flow, epochs=epochs, validation_data=test_flow)


    # Prediction
    y_pred = np.round(model.predict(test_flow)).flatten()

    # summarize measure results
    measure = {
        "graphs": n_graph,
        "nodes": graph.number_of_nodes(),
        "edges": graph.number_of_edges()
    }

    # stop system meters and add them to the measure
    for meter in meters:
        meter.stop()
        measure[meter.name] = meter.result()

    # save measure
    measures.append(measure)
    print(measure)

Start performance measures:
** Sampled 90 positive and 90 negative edges. **
** Sampled 81 positive and 81 negative edges. **
link_classification: using 'ip' method to combine node embeddings into edge embeddings
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
{'graphs': 100, 'nodes': 603, 'edges': 902, 'Time [min]': 0.13, 'Memory [MB]': 30.03, 'CPU [%]': 12.88}
** Sampled 180 positive and 180 negative edges. **
** Sampled 162 positive and 162 negative edges. **
link_classification: using 'ip' method to combine node embeddings into edge embeddings
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
{'graphs': 200, 'nodes': 1203, 'edges': 1802, 'Time [min]': 0.2, 'Memory [MB]': 71.96, 'CPU [%]': 18.84}
** Sampled 2700 positive and 2700 negative edges. **
** Sampled 2430 positive and 2430 negative edges. **
link_classifi

## 16.2 Anzeige Resultate

In [7]:
df = SystemMeter.create_df(measures, labels)
print(df)

             Measure A  Measure B  Measure C  Measure D
Graphs          100.00     200.00    3000.00    6000.00
Nodes           603.00    1203.00   18003.00   36003.00
Edges           902.00    1802.00   27002.00   54002.00
Time [min]        0.13       0.20       9.76      36.21
Memory [mb]      30.03      71.96   10450.18   41634.49
Cpu [%]          12.88      18.84      54.77      58.43


---
_The end._