# Loading and streaming back data with Apache Arrow

## Setup

For this example, the `ogb` extra is needed in order to load OGBN datasets.

In [None]:
%pip install 'graphdatascience>=1.7'

from graphdatascience import GraphDataScience

# Replace with the actual connection URI and credentials
AURA_CONNECTION_URI = "neo4j+s://xxxxxxxx.databases.neo4j.io"
AURA_USERNAME = "neo4j"
AURA_PASSWORD = ""

# When initialized, the client tries to use Arrow if it is available on the server.
# This behaviour is controlled by the `arrow` parameter, which is set to `True` by default.
gds = GraphDataScience(AURA_CONNECTION_URI, auth=(AURA_USERNAME, AURA_PASSWORD), aura_ds=True)

# Necessary if Arrow is enabled (as is by default on Aura)
gds.set_database("neo4j")

In [None]:
gds.debug.arrow()

## Loading data

In [None]:
%pip install pandas

import pandas as pd

In [None]:
nodes = pd.DataFrame(
    {
        "nodeId": [0, 1, 2],
        "labels":  ["Article", "Article", "Article"],
        "pages": [3, 7, 12],
    }
)

relationships = pd.DataFrame(
    {
        "sourceNodeId": [0, 1],
        "targetNodeId": [1, 2],
        "relationshipType": ["CITES", "CITES"],
        "times": [2, 1]
    }
)

article_graph = gds.graph.construct(
    "article-graph",
    nodes,
    relationships
)

In [None]:
gds.graph.list()

## Streaming node and relationship properties

In [None]:
# Read all the values for the node property `pages`
gds.graph.nodeProperties.stream(article_graph, "pages")

In [None]:
# Read all the values for the relationship property `times`
gds.graph.relationshipProperties.stream(article_graph, "times")

## Performance

In [None]:
%pip install 'graphdatascience[ogb]>=1.7'

# Load and immediately drop the dataset to download and cache the data
ogbn_arxiv = gds.graph.ogbn.load("ogbn-arxiv")
ogbn_arxiv.drop()

In [None]:
%%timeit -n 1 -r 1

# This call uses the cached dataset, so only the actual loading is timed
ogbn_arxiv = gds.graph.ogbn.load("ogbn-arxiv")

## Cleanup

In [11]:
article_graph.drop()
ogbn_arxiv.drop()

gds.close()