## Extract data from a gbq table and load it as nodes in AuraDB
This is a simplified script to demo the basics based on not needing to transform data.

In [None]:
# Import packages
import pandas as pd
from pandas.io import gbq

### Extract data
- If you have the permission to the project where your gbq datatable is, you can extract data into a dataframe.
- Reminder: querying gbq table will cause costs as running gbq tables.

In [None]:
# read in data
data = '''SELECT * FROM `<your_gbq_project.dataset.table>`'''
df_data = gbq.read_gbq(data, project_id='<your_gbq_project>')

### Load to Graph
- You will need an AuraDB instance and credentials.
- If there is no need to transform, you can load data as nodes into your AuraDB instance.

In [None]:
from neo4j import GraphDatabase, RoutingControl, Result
# Define the Neo4j connection
uri = "neo4j+ssc://<your_instance>.databases.neo4j.io"
user = "<username>" # e.g. "neo4j"
password = "<password>"
db_name = "db_name" # e.g. "neo4j"
driver = GraphDatabase.driver(uri, auth=(user, password))

In [None]:
driver.verify_connectivity()

### Split dataframe into chunks
This step is to split large dataframe to facilitate the loading.

In [None]:
# Split dataframe into chunks
def split_dataframe(dataframe, chunk_size = 5000): 
    chunks = list()
    num_chunks = len(dataframe) // chunk_size + 1
    for i in range(num_chunks):
        chunks.append(dataframe[i*chunk_size:(i+1)*chunk_size])
    return chunks

### Create nodes

In [None]:
# Function to update nodes in Neo4j With chunks
def create_node(dataframe, label):
    node_query = (
        f"UNWIND $rows AS row "
        f"MERGE (n:{label}) "
        f"SET n += row "
        f"RETURN count(*) AS nodes_created;"
    )
    
    for chunk in split_dataframe(dataframe):
        records, summary, keys = driver.execute_query(
            node_query,
            database_=db_name,
            routing_=RoutingControl.WRITE,
            rows = chunk.to_dict('records')
        )
        print(summary.counters)        

In [None]:
# Load nodes into Neo4j. Replace 'NodeLabel' with the label you want to use for the nodes.
create_node(df_data, 'NodeLabel')