In [1]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase
import yaml

(
    print(".env variables loaded!")
    if load_dotenv()
    else print("Unable to load .env variables.")
)

.env variables loaded!


In [10]:
# Load yaml file
config = yaml.safe_load(open('config.yaml'))

## Test cypher by anchoring on specific node
* **prod_khc_sales.kroger.kroger_daily_point_of_sale_fact**
  * Original name tested in the call with the kraft team

* **prod_khc_sales.iri_us_raw.iri_us_banner_total_category_product**
  * Good example to show path explosion (this returns 2.7 million plus records)

In [3]:
# Initial record provided by the Kraft Team
sale_fact = 'prod_khc_sales.kroger.kroger_daily_point_of_sale_fact'

# Large table that produces 2.7 million variable paths when using default query expansion
total_category_product = 'prod_khc_sales.iri_us_raw.iri_us_banner_total_category_product'

with GraphDatabase.driver(os.environ.get("NEO4J_URI"), auth=(os.environ.get("NEO4J_USERNAME"), os.environ.get("NEO4J_PASSWORD"))) as driver:
    
    records, summary, keys = driver.execute_query(config['spanning_tree_only'], 
                                   name=sale_fact, 
                                   database=os.environ.get("NEO4J_DATABASE"))

    # Print Summary information
    print("Finished executing query and returned {root_count} roots & {leaf_count} leaves after {time} ms".format(
    time=summary.result_available_after,
    root_count=len(records[0]['roots']['roots']),
    leaf_count=len(records[0]['leaves']['leaves'])
    ))
    
    print(summary.profile['args']['string-representation'])
    

Finished executing query and returned 21 roots & 1296 leaves after 1 ms
Planner COST

Runtime PIPELINED

Runtime version 5.21

Batch size 128

+---------------------+----+----------------------------------------------------------------------------------+----------------+------+---------+----------------+------------------------+-----------+---------------------+
| Operator            | Id | Details                                                                          | Estimated Rows | Rows | DB Hits | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline            |
+---------------------+----+----------------------------------------------------------------------------------+----------------+------+---------+----------------+------------------------+-----------+---------------------+
| +ProduceResults     |  0 | roots, leaves                                                                    |             10 |    1 |  117711 |       48673736 |                27661/0 |   7

In [11]:
with GraphDatabase.driver(os.environ.get("NEO4J_URI"), auth=(os.environ.get("NEO4J_USERNAME"), os.environ.get("NEO4J_PASSWORD"))) as driver:
    
    records, summary, keys = driver.execute_query(config['spanning_tree_only'], 
                                   name=total_category_product, 
                                   database=os.environ.get("NEO4J_DATABASE"))

    # Print Summary information
    print("Finished executing query and returned {root_count} roots & {leaf_count} leaves after {time} ms".format(
    time=summary.result_available_after,
    root_count=len(records[0]['roots']['roots']),
    leaf_count=len(records[0]['leaves']['leaves'])
    ))
    
    print(summary.profile['args']['string-representation'])

Finished executing query and returned 36 roots & 1876 leaves after 2 ms
Planner COST

Runtime PIPELINED

Runtime version 5.21

Batch size 128

+---------------------+----+----------------------------------------------------------------------------------+----------------+------+---------+----------------+------------------------+-----------+---------------------+
| Operator            | Id | Details                                                                          | Estimated Rows | Rows | DB Hits | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline            |
+---------------------+----+----------------------------------------------------------------------------------+----------------+------+---------+----------------+------------------------+-----------+---------------------+
| +ProduceResults     |  0 | roots, leaves                                                                    |             10 |    1 |  123883 |       97561728 |                51828/0 |  18

In [5]:
with GraphDatabase.driver(os.environ.get("NEO4J_URI"), auth=(os.environ.get("NEO4J_USERNAME"), os.environ.get("NEO4J_PASSWORD"))) as driver:
    
    records, summary, keys = driver.execute_query(config['qpp_query_stream_all'], 
                                   name=sale_fact, 
                                   database=os.environ.get("NEO4J_DATABASE"))

    # Print Summary information
    print("Finished executing query and returned {leaf_count} paths after {time} ms".format(
    time=summary.result_available_after,
    # root_count=len(records[0]['roots']['roots']),
    leaf_count=len(records)
    ))
    
    print(summary.profile['args']['string-representation'])

Finished executing query and returned 37356 paths after 1 ms
Planner COST

Runtime PIPELINED

Runtime version 5.21

Batch size 128

+-----------------------+----+------------------------------------------------------------------------------------+----------------+-------+---------+----------------+------------------------+-----------+---------------------+
| Operator              | Id | Details                                                                            | Estimated Rows | Rows  | DB Hits | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline            |
+-----------------------+----+------------------------------------------------------------------------------------+----------------+-------+---------+----------------+------------------------+-----------+---------------------+
| +ProduceResults       |  0 | startNode, leaf, paths                                                             |              1 | 37356 | 1270362 |           6888 |                    

In [12]:
with GraphDatabase.driver(os.environ.get("NEO4J_URI"), auth=(os.environ.get("NEO4J_USERNAME"), os.environ.get("NEO4J_PASSWORD"))) as driver:
    
    records, summary, keys = driver.execute_query(config['qpp_query_stream_id_paths'], 
                                   name=sale_fact, 
                                   database=os.environ.get("NEO4J_DATABASE"))

    # Print Summary information
    print("Finished executing query and returned {leaf_count} paths after {time} ms".format(
    time=summary.result_available_after,
    # root_count=len(records[0]['roots']['roots']),
    leaf_count=len(records)
    ))
    
    print(summary.profile['args']['string-representation'])

Finished executing query and returned 37356 paths after 1 ms
Planner COST

Runtime PARALLEL

Runtime version 5.21

Batch size 128

+-----------------------+----+----------------------------------------------------------------------+----------------+-------+---------+----------------+------------------------+-----------+---------------------+
| Operator              | Id | Details                                                              | Estimated Rows | Rows  | DB Hits | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline            |
+-----------------------+----+----------------------------------------------------------------------+----------------+-------+---------+----------------+------------------------+-----------+---------------------+
| +ProduceResults       |  0 | startNodeId, leafId, paths                                           |              1 | 37356 |  473419 |        5210976 |               310962/0 |   863.011 | In Pipeline 2       |
| |              

In [13]:
with GraphDatabase.driver(os.environ.get("NEO4J_URI"), auth=(os.environ.get("NEO4J_USERNAME"), os.environ.get("NEO4J_PASSWORD"))) as driver:
    
    records, summary, keys = driver.execute_query(config['qpp_query_stream_id_paths'], 
                                   name=total_category_product, 
                                   database=os.environ.get("NEO4J_DATABASE"))

    # Print Summary information
    print("Finished executing query and returned {leaf_count} paths after {time} ms".format(
    time=summary.result_available_after,
    # root_count=len(records[0]['roots']['roots']),
    leaf_count=len(records)
    ))
    
    print(summary.profile['args']['string-representation'])

Finished executing query and returned 2746317 paths after 1 ms
Planner COST

Runtime PARALLEL

Runtime version 5.21

Batch size 128

+-----------------------+----+----------------------------------------------------------------------+----------------+---------+----------+----------------+------------------------+-----------+---------------------+
| Operator              | Id | Details                                                              | Estimated Rows | Rows    | DB Hits  | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline            |
+-----------------------+----+----------------------------------------------------------------------+----------------+---------+----------+----------------+------------------------+-----------+---------------------+
| +ProduceResults       |  0 | startNodeId, leafId, paths                                           |              1 | 2746317 | 43466215 |        8157824 |             27768365/0 | 65201.715 | In Pipeline 2       |
| |