# Benchmarking Aura features

In [None]:
import timeit
from graphdatascience.aura_graph_data_science import AuraGraphDataScience
from graphdatascience.aura_sessions import AuraSessions
from graphdatascience.query_runner.aura_db_arrow_query_runner import AuraDbConnectionInfo
import numpy as np
import seaborn as sns

In [None]:
CLIENT_ID = os.environ["CLIENT_ID"]
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
DB_PASSWORD = "" 
DB_ID = "79ff87d0"
os.environ["AURA_ENV"] = "devstrawberry"

db_connection_info = AuraDbConnectionInfo(
        f"neo4j+s://{DB_ID}-{os.environ['AURA_ENV']}.databases.neo4j-dev.io", ("neo4j", DB_PASSWORD)
    )

In [None]:
sessions = AuraSessions(db_connection_info, (CLIENT_ID, CLIENT_SECRET))
session_name = "benchmark-session"
session_pw = "my-password"

print("Starting GDS session")
if session_name in [i.name for i in sessions.list_sessions()]:
    gds = sessions.connect(session_name, session_pw)
else:
    gds = sessions.create_gds(session_name, session_pw)

In [None]:
def measure(func, setup, iterations, warmup_iterations):
    warmup = timeit.repeat(
        func,
        setup=setup,
        number=1,
        repeat=warmup_iterations
    )
    measurement = timeit.repeat(
        func,
        setup=setup,
        number=1,
        repeat=iterations
    )
    return {"iterations": measurement, "mean": np.mean(measurement), "avg": np.average(measurement)}

## Projection

In [None]:
def run_remote_projection(query, concurrency):  
  with gds.graph.project.remoteDb(
        "graph",
        query,
    concurrency=concurrency
    ) as G:
        ()

### Project entire graph, structure only

#### No parallel Runtime

In [None]:
data = {}
for concurrency in [1, 2, 4, 8]:
    result = measure(
                lambda: run_remote_projection(
            """
            MATCH (u)
            OPTIONAL MATCH (u)-[r]->(t)
            RETURN gds.graph.project.remote(u, t, {})
            """,
            concurrency=concurrency
        ),
        lambda: gds.graph.drop("graph", failIfMissing=False),
        iterations=10,
        warmup_iterations=5
    )
    data[concurrency] = result

plot = sns.barplot({k: v["mean"] for k, v in data.items()})
plot.set(xlabel="conurrency", ylabel="average runtime")


#### Parallel Runtime

In [None]:
data = {}
for concurrency in [1, 2, 4, 8]:
    result = measure(
        lambda: run_remote_projection(
            """
            CYPHER runtime = parallel
            MATCH (u)
            OPTIONAL MATCH (u)-[r]->(t)
            RETURN gds.graph.project.remote(u, t, {})
            """,
            concurrency=concurrency
        ),
        lambda: (),
        iterations=10,
        warmup_iterations=5
    )
    data[concurrency] = result

plot = sns.barplot({k: v["mean"] for k, v in data.items()})
plot.set(xlabel="conurrency", ylabel="average runtime")

### Project entire graph, with properties and labels

In [None]:
query = """
            MATCH (u)
            OPTIONAL MATCH (u)-[r]->(t)
            RETURN gds.graph.project.remote(u, t, {
                sourceNodeLabels: labels(u),
                sourceNodeProperties: {id: id(u)},
                targetNodeLabels: labels(t),
                targetNodeProperties: {id: id(t)},
                relationshipType: type(r),
                relationshipProperties: {id: id(r)}
            })
        """

data = {}
for concurrency in [1, 2, 4, 8]:
    result = measure(
        lambda: run_remote_projection(
            query,
            concurrency=concurrency
        ),
        lambda: (),
        iterations=10,
        warmup_iterations=5
    )
    data[concurrency] = result

plot = sns.barplot({k: v["mean"] for k, v in data.items()})
plot.set(xlabel="conurrency", ylabel="average runtime")

## Write back

In [None]:
G, _ = gds.graph.project.remoteDb(
        "graph",
        """
    CYPHER runtime = parallel
    MATCH (u)
    OPTIONAL MATCH (u)-[r]->(t)
    RETURN gds.graph.project.remote(u, t, {})
    """,
    concurrency=4
    )

gds.degree.mutate(G, mutateProperty="degree")
gds.fastRP.mutate(G, mutateProperty="embedding", embeddingDimension=128, iterationWeights=[1.0])
gds.knn.mutate(G, topK=4, mutateRelationshipType="KNN_RELS", mutateProperty="score", maxIterations=1, nodeProperties=["embedding"])

In [None]:
result = timeit.repeat(
    lambda: gds.run_cypher("MATCH (n) SET n.degree = null"),
    lambda: gds.graph.nodeProperties.write(G, node_properties=["degree"]), 
    number=1, 
    repeat=iterations)

print(f"scalar property: {result}")

In [None]:
result = timeit.repeat(
    lambda: gds.run_cypher("MATCH (n) SET n.embedding = null"),
    lambda: gds.graph.nodeProperties.write(G, node_properties=["embedding"]), 
    number=1, 
    repeat=iterations)

print(f"array property: {result}")

In [None]:
result = timeit.repeat(
    lambda: gds.run_cypher("MATCH (n)-[r:KNN_RELS]->() DELETE r"),
    lambda: gds.graph.relationship.write(G, relationship_type="KNN_RELS", relationship_property="score"), 
    number=1, 
    repeat=iterations)

print(f"relationships: {result}")

## Cleanup

In [None]:
G.drop()

In [None]:
sessions.delete_gds(session_name)

In [None]:
# cleanup

from graphdatascience.aura_api import AuraApi

aura_api = AuraApi(CLIENT_ID, CLIENT_SECRET)
aura_api.delete_instance(DB_ID)