# Create driver
Also set the DB_NAME

In [1]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from graphdatascience import GraphDataScience # Python GDS client

DB_ULR = "neo4j://localhost:7687"
DB_USER = "neo4j"
DB_PASS = "test1234"
DB_NAME = "traceall"

# And some cermony to create the driver and gds objects
gds = GraphDataScience(DB_ULR, auth=(DB_USER, DB_PASS))
gds.set_database(DB_NAME)
gds.version()

'2.2.7'

# Create databse and schema

In [None]:
# Create (or replace) database
df = gds.run_cypher("CREATE OR REPLACE DATABASE {dbname}".format(dbname = DB_NAME), database="system")

In [None]:
# Create indexes and constraints
gds.run_cypher('CREATE CONSTRAINT IF NOT EXISTS FOR (n:Place) REQUIRE (n.id, n.lot) IS NODE KEY',database=DB_NAME)
gds.run_cypher('CREATE CONSTRAINT IF NOT EXISTS FOR (n:Part) REQUIRE (n.id, n.lot) IS NODE KEY',database=DB_NAME)


#  Load data

In [5]:
# Read csv file
b2s = pd.read_csv('./datasets/large/trace_b2s_large.csv')
b2s.head()

Unnamed: 0,part_id,prod_lot,place_id,ship_lot
0,M01,1,F01,1
1,M01,2,F01,2
2,M01,3,F01,3
3,M01,4,F01,4
4,M01,5,F01,5


In [6]:
# Read csv file
bom = pd.read_csv('./datasets/large/trace_bom_large.csv')
bom.head()

Unnamed: 0,parent_id,parent_lot,child_id,child_lot
0,M01,1,P0001,495079
1,P0001,495079,P0011,753006
2,P0011,753006,P0111,737363
3,P0111,737363,P1111,192562
4,P0111,737363,P1112,112039


In [7]:
# Read csv file
scn = pd.read_csv('./datasets/large/trace_scn_large.csv')
scn.head()

Unnamed: 0,src_id,src_lot,dst_id,dst_lot
0,F01,1,I0001,90850
1,I0001,90850,I0011,836047
2,I0011,836047,I0111,182954
3,I0111,182954,I1111,983536
4,I0111,182954,I1112,127316


In [8]:
# Select all place nodes
places = b2s[['place_id','ship_lot']].drop_duplicates().dropna()
places.head()

Unnamed: 0,place_id,ship_lot
0,F01,1
1,F01,2
2,F01,3
3,F01,4
4,F01,5


In [13]:
# Create Place nodes
for chunk in np.array_split(places, 2):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MERGE (n:Place{id: row.place_id, lot: row.ship_lot})
        RETURN count(*) as nodesCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
    print(df)

   nodesCreated
0         50000
   nodesCreated
0         50000


In [15]:
# Select all part nodes
parts = b2s[['part_id','prod_lot']].drop_duplicates().dropna()
parts.head()

Unnamed: 0,part_id,prod_lot
0,M01,1
1,M01,2
2,M01,3
3,M01,4
4,M01,5


In [16]:
# Create Part nodes
for chunk in np.array_split(parts, 2):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MERGE (n:Part{id: row.part_id, lot: row.prod_lot})
        RETURN count(*) as nodesCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
    print(df)

   nodesCreated
0         50000
   nodesCreated
0         50000


In [18]:
# Create Part - produced_at - Place
for chunk in np.array_split(b2s, 2):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MATCH (part:Part{id: row.part_id, lot: row.prod_lot}), (place:Place{id: row.place_id, lot: row.ship_lot})
        MERGE (part)-[:produced_at]->(place)
        RETURN count(*) as relsCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
3   print(df)

   relsCreated
0        50000
   relsCreated
0        50000


In [34]:
# parent_id	parent_lot	child_id	child_lot
# Select all part nodes from bom data
bom_parts = pd.concat([
            bom[['parent_id','parent_lot']].rename(columns={'parent_id': 'id', 'parent_lot':'lot'}),
            bom[['child_id','child_lot']].rename(columns={'child_id': 'id', 'child_lot':'lot'})
]).drop_duplicates().dropna()
bom_parts.head()

Unnamed: 0,id,lot
0,M01,1
1,P0001,495079
2,P0011,753006
3,P0111,737363
6,P0112,841064


In [35]:
for chunk in np.array_split(bom_parts, 60):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MERGE (n:Part{id: row.id, lot: row.lot})
        RETURN count(*) as nodesCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
    print(df)

   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49250
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodesCreated
0         49249
   nodes

In [36]:
# src_id	src_lot	dst_id	dst_lot
# Select all place nodes from sourcing data
scn_locations = pd.concat([
            scn[['src_id','src_lot']].rename(columns={'src_id': 'id', 'src_lot':'lot'}),
            scn[['dst_id','dst_lot']].rename(columns={'dst_id': 'id', 'dst_lot':'lot'})
]).drop_duplicates().dropna()
bom_parts.head()

Unnamed: 0,id,lot
0,M01,1
1,P0001,495079
2,P0011,753006
3,P0111,737363
6,P0112,841064


In [37]:
for chunk in np.array_split(scn_locations, 60):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MERGE (n:Place{id: row.id, lot: row.lot})
        RETURN count(*) as nodesCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
    print(df)

   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49246
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodesCreated
0         49245
   nodes

In [38]:
# parent_id	parent_lot	child_id	child_lot
# Create Part - part_of - Part
for chunk in np.array_split(bom, 60):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MATCH (child:Part{id: row.child_id, lot: row.child_lot}), (parent:Part{id: row.parent_id, lot: row.parent_lot})
        MERGE (child)-[:part_of]->(parent)
        RETURN count(*) as relsCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
    print(df)

   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCre

In [40]:
# src_id	src_lot	dst_id	dst_lot
# Create Place - supplied_to - Place
for chunk in np.array_split(scn, 60):
    df = gds.run_cypher(
        """
        UNWIND $data as row
        MATCH (src:Place{id: row.src_id, lot: row.src_lot}), (dst:Place{id: row.dst_id, lot: row.dst_lot})
        MERGE (src)-[:supplied_to]->(dst)
        RETURN count(*) as relsCreated
        """,
        params = {'data' :  chunk.to_dict('records')},
        database=DB_NAME
    
    )
    print(df)

   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCreated
0        50000
   relsCre

In [47]:
import time
time_s = time.time()
all_places = gds.run_cypher("""
    match (p1:Part{id:'P1111', lot:190295})-[*1..9]->(p2:Place)
    return distinct p2.id,p2.lot
    """, database=DB_NAME
)
time_e = time.time()
print(time_e-time_s)


0.008104085922241211
