Berner Fachhochschule BFH - MAS Data Science - Graph Machine Learning - Master Thesis FS/2022 Thomas Iten

# 15. Performance Messung

In [1]:
from gml.graph.data_factory import DataFactory, MatchingCluster, Employee, DataCollection
from gml.graph.graph_link import GraphLink, Algorithm
from gml.measure.system_meter import SystemMeter, CpuSystemMeter, MemorySystemMeter, TimeSystemMeter

## 15.1 Messung Graph Erstellung und Link Prediction

In [4]:
# init system meters
meters = [TimeSystemMeter(), MemorySystemMeter(), CpuSystemMeter()]

# init data
labels   = ["Measure A", "Measure B", "Measure C", "Measure D", "Measure E", "Measure F"]
n_graphs = [1000, 2000, 3000, 6000, 12000, 24000]
measures = []

print("Start performance measures:")
for n_graph in n_graphs:

    # start meters
    for meter in meters:
        meter.start()

    # create test graph with n_graph sub graphs
    graph = DataFactory().create_graph(n=n_graph, add_dc=True, connected=True)

    # create possible edges between employee - matching cluster and employee - data cluster
    possible_edges = []
    for i in range(n_graph):
        dc = DataCollection.create_name(i)
        mc = MatchingCluster.create_name(i)
        em = Employee.create_name(i)
        possible_edges.append((em, dc))
        possible_edges.append((em, mc))

    # run link predictions
    GraphLink(graph, possible_edges) \
        .predict(Algorithm.RESOURCE_ALLOCATION_INDEX) \
        .predict(Algorithm.JACCARD_COEFFICIENT) \
        .predict(Algorithm.ADAMIC_ADAR_INDEX) \
        .predict(Algorithm.PREFERENTIAL_ATTACHMENT)

    # summarize measure results
    measure = {
        "graphs": n_graph,
        "nodes": graph.number_of_nodes(),
        "edges": graph.number_of_edges(),
        "possible edges": len(possible_edges)
    }

    # stop system meters and add them to the measure
    for meter in meters:
        meter.stop()
        measure[meter.name] = meter.result()

    # save measure
    measures.append(measure)
    print(measure)

Start performance measures:
{'graphs': 1000, 'nodes': 6003, 'edges': 9002, 'possible edges': 2000, 'Time [min]': 0.07, 'Memory [MB]': 7.59, 'CPU [%]': 8.28}
{'graphs': 2000, 'nodes': 12003, 'edges': 18002, 'possible edges': 4000, 'Time [min]': 0.27, 'Memory [MB]': 14.92, 'CPU [%]': 8.33}
{'graphs': 3000, 'nodes': 18003, 'edges': 27002, 'possible edges': 6000, 'Time [min]': 0.58, 'Memory [MB]': 21.87, 'CPU [%]': 8.33}
{'graphs': 6000, 'nodes': 36003, 'edges': 54002, 'possible edges': 12000, 'Time [min]': 2.42, 'Memory [MB]': 44.06, 'CPU [%]': 8.33}
{'graphs': 12000, 'nodes': 72003, 'edges': 108002, 'possible edges': 24000, 'Time [min]': 10.39, 'Memory [MB]': 88.08, 'CPU [%]': 8.33}
{'graphs': 24000, 'nodes': 144003, 'edges': 216002, 'possible edges': 48000, 'Time [min]': 48.87, 'Memory [MB]': 176.37, 'CPU [%]': 8.33}


## 15.2 Anzeige Resultate

In [5]:
df = SystemMeter.create_df(measures, labels)
print(df)

                Measure A  Measure B  Measure C  Measure D  Measure E  \
Graphs            1000.00    2000.00    3000.00    6000.00   12000.00   
Nodes             6003.00   12003.00   18003.00   36003.00   72003.00   
Edges             9002.00   18002.00   27002.00   54002.00  108002.00   
Possible edges    2000.00    4000.00    6000.00   12000.00   24000.00   
Time [min]           0.07       0.27       0.58       2.42      10.39   
Memory [mb]          7.59      14.92      21.87      44.06      88.08   
Cpu [%]              8.28       8.33       8.33       8.33       8.33   

                Measure F  
Graphs           24000.00  
Nodes           144003.00  
Edges           216002.00  
Possible edges   48000.00  
Time [min]          48.87  
Memory [mb]        176.37  
Cpu [%]              8.33  


---
_The end._