# Create Source CSVs
This is for tracking purposes only and will not be reproducible.  It creates the source csv files and uploads them to google cloud storage

In [1]:
from os import times

from dotenv import load_dotenv
import os

from nbclient.exceptions import timeout_err_msg

env_file = '.env'
if os.path.exists(env_file):
    load_dotenv(env_file, override=True)

    HOST = os.getenv('NEO4J_URI')
    USERNAME = os.getenv('NEO4J_USERNAME')
    PASSWORD = os.getenv('NEO4J_PASSWORD')

else:
    print(f"File {env_file} not found.")

In [5]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(HOST, auth=(USERNAME, PASSWORD))

## Create CSVs

In [6]:
import os

os.makedirs("source-csvs", exist_ok=True)

In [7]:
# Suppliers
res_df = driver.execute_query("""
MATCH (s:Supplier)
OPTIONAL MATCH (s)-[:LOCATED_AT]->(l)
    RETURN s.code AS code,
        s.ANNUAL_SPEND AS annual_spend,
        s.type AS tier,
        s.sub_type AS sub_type,
        s.category AS category,
        l.latitude AS latitude,
        l.longitude AS longitude
""", result_transformer_= lambda r: r.to_df())
res_df.to_csv('source-csvs/suppliers.csv', index=False)
res_df

Unnamed: 0,code,annual_spend,tier,sub_type,category,latitude,longitude
0,1BYKA5,0.00,Tier1,Manu,,-71.854680,71.681478
1,0Q6W9M,0.00,Tier1,Direct,,-71.854680,71.681478
2,U5TLHT,8056092.85,Tier1,Direct,,-71.854680,71.681478
3,1QZ9ME,0.00,Tier1,Direct,,-71.854680,71.681478
4,WH3ZVG,4801.61,Tier1,Direct,,-71.629455,70.897790
...,...,...,...,...,...,...,...
13885,07XTEF,0.00,Tier1,Manu,,-68.723473,66.523889
13886,P3OZZV,98107.83,Tier1,Direct,,-68.723473,66.523889
13887,7PM532,643743.20,Tier1,Direct,,-71.930702,71.005756
13888,ENEDNC,5453.94,Tier1,Direct,,-71.536288,71.746616


In [8]:
# SupplierItems
print("Reading Data...")
res = driver.execute_query("""
MATCH (i:Product|Component)-[:AT]->(s:Supplier)
    RETURN s.code AS supplier_code,
        i.skuId AS sku_id
""", result_transformer_= lambda r: r.to_df())
print("Writing CSV...")
res.to_csv('source-csvs/supplier-items.csv', index=False)


Reading Data...
Writing CSV...


In [12]:
# Items
print("Reading Data...")
res_df = driver.execute_query("""
MATCH (i:Product|Component)
OPTIONAL MATCH (i)-[:AT]->(s:Supplier)
WITH i, coalesce(max(s.type), 'Tier0') AS estimated_tier
OPTIONAL MATCH path=(i)-[:GROUP_OF]->(g:Group)-[:SUB_CAT]->(s)-[:CAT_OF]->(c)
WHERE NOT (s.code STARTS WITH "DL" AND c.code = "AW") //throw out 2 odd cases
WITH i, estimated_tier, g.code AS code, nodes(collect(path)[0]) AS path_nodes //minority of multi-subcategories...just picking first one
with i, labels(i)[0] AS type, estimated_tier, code AS group_code, path_nodes[2].code AS sub_category,  path_nodes[3].code AS category
RETURN i.skuId AS sku_id,
    CASE labels(i)
        WHEN ['Product'] THEN 'PRODUCT'
        WHEN ['Component'] THEN 'COMPONENT'
        ELSE 'PRODUCT_AND_COMPONENT'
    END AS type,
  group_code,
  sub_category,
  category,
  estimated_tier
""", result_transformer_= lambda r: r.to_df())
res_df

Reading Data...




Unnamed: 0,sku_id,type,group_code,sub_category,category,estimated_tier
0,12243661,COMPONENT,1000.0,AW-0009,AW,Tier1
1,28499719,COMPONENT,1001.0,AX-0013,AX,Tier1
2,28246025,COMPONENT,1001.0,AX-0013,AX,Tier1
3,28722634,COMPONENT,1001.0,AX-0013,AX,Tier1
4,28263736,COMPONENT,1001.0,AX-0013,AX,Tier1
...,...,...,...,...,...,...
233604,25367709,PRODUCT,,,,Tier0
233605,25372957,PRODUCT,,,,Tier0
233606,28052594,PRODUCT,,,,Tier0
233607,28092207,PRODUCT,,,,Tier0


In [17]:
import random

tier_archetypes = {
    "Tier6": [ "RubberSeal", "PrecisionBolt", "Bolt", "Tubing", "Fastener", "RawWire", "Casting", "Rod"],
    "Tier5": ["HydraulicFitting", "WiringLoom", "Bearing", "GearSet", "Gear", "Belt", "Sensor", "Harness", "Pulley", "Spring", "Bushing", "Clamp"],
    "Tier4": ["PumpAssembly", "SensorUnit", "ControlValve", "MotorDrive", "Connector", "Cylinder"],
    "Tier3": ["EngineCore", "AxleAssembly", "SteeringColumn", "ElectricalControlBox", "FrameSegment", "ComponentModule", "SensorModule", "ControlUnit", "Board"],
    "Tier2": ["MachineFrame", "MachineArm", "OperatorCab", "PowerSystemModule", "ToolMountKit"],
    "Tier1": ["DrivePlatform", "HydraulicSystem", "ChassisFrame", "CabWiringUnit", "MachineRig", "ControlAssembly"],
    "Tier0": ["FarmTractor", "CropHarvester", "HayCollector", "FieldSprayer", "SeedPlanter"]
}

def generate_component_name(tier):
    archetype = random.choice(tier_archetypes[tier])
    suffix = ''.join(random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", k=5))
    return f"{archetype}_{suffix}"

random.seed(7474)
res_df['name'] = res_df.estimated_tier.apply(lambda x: generate_component_name(x))



print("Writing CSVS...")
res_df[['sku_id', 'name', 'type']].to_csv('source-csvs/items.csv', index=False)
res_df.loc[ res_df.estimated_tier == 'Tier6',['sku_id', 'name']].to_csv('source-csvs/items-estimated-tier-6.csv', index=False)

Writing CSVS...


In [18]:
# BOMTable
print("Reading Data...")
res = driver.execute_query("""
MATCH (parent:Product|Component)-[:BOM]->(child)
RETURN parent.skuId AS parent_sku_id,
    child.skuId AS child_sku_id
""", result_transformer_= lambda r: r.to_df())
print("Writing CSV...")
res.to_csv('source-csvs/bom-table.csv', index=False)

Reading Data...
Writing CSV...


In [19]:
# ComponentGroups
print("Reading Data...")
res = driver.execute_query("""
MATCH path=(g:Group)-[:SUB_CAT]->(s)-[:CAT_OF]->(c)
WHERE NOT (s.code STARTS WITH "DL" AND c.code = "AW") //throw out 2 odd cases
WITH g.code AS code, nodes(collect(path)[0]) AS path_nodes //minority of multi-subcategories...just picking first one
RETURN code, path_nodes[1].code AS sub_category,  path_nodes[2].code AS category
""", result_transformer_= lambda r: r.to_df())
print("Writing CSV...")
res.to_csv('source-csvs/component-groups.csv', index=False)

Reading Data...
Writing CSV...


In [20]:
# CustomerFamilies
print("Reading Data...")
res = driver.execute_query("""
MATCH (n:CustomerFamily)
RETURN n.code AS code, n.ANNUAL_REVENUE AS annual_revenue
""", result_transformer_= lambda r: r.to_df())
print("Writing CSV...")
res.to_csv('source-csvs/customer-families.csv', index=False)

Reading Data...
Writing CSV...


In [21]:
# Customers
print("Reading Data...")
res = driver.execute_query("""
MATCH (c:Customer)
OPTIONAL MATCH (c)-[:LOCATED_AT]->(l)
OPTIONAL MATCH (c)-[:PART_OF]->(customerFamily)
    RETURN c.code AS code,
        customerFamily.code AS cust_family_code,
        c.ANNUAL_REVENUE AS annual_revenue,
        l.latitude AS latitude,
        l.longitude AS longitude
""", result_transformer_= lambda r: r.to_df())
print("Writing CSV...")
res.to_csv('source-csvs/customers.csv', index=False)

Reading Data...
Writing CSV...


In [22]:
# CustomerItems
print("Reading Data...")
res = driver.execute_query("""
MATCH (i:Product|Component)-[:AT]->(c:Customer)
    RETURN c.code AS customer_code,
        i.skuId AS sku_id
""", result_transformer_= lambda r: r.to_df())
print("Writing CSV...")
res.to_csv('source-csvs/customer-items.csv', index=False)

Reading Data...
Writing CSV...


## Upload to Google Cloud Storage

In [23]:
!gcloud auth application-default login > /dev/null 2>&1

In [27]:
!gsutil cp source-csvs/bom-table.csv gs://neo4j-workshop-data/genai-bom

Copying file://source-csvs/bom-table.csv [Content-Type=text/csv]...
/ [0 files][127.4 MiB/129.9 MiB]                                                 [0 files][  6.7 MiB/129.9 MiB]                                                |/-\ [0 files][ 97.2 MiB/129.9 MiB]                                                // [1 files][129.9 MiB/129.9 MiB]                                                
Operation completed over 1 objects/129.9 MiB.                                    


In [28]:
!gsutil cp source-csvs/customer-items.csv gs://neo4j-workshop-data/genai-bom

Copying file://source-csvs/customer-items.csv [Content-Type=text/csv]...
-[0 files][    0.0 B/  9.7 MiB]                                                - [1 files][  9.7 MiB/  9.7 MiB]                                                
Operation completed over 1 objects/9.7 MiB.                                      


In [29]:
!gsutil cp source-csvs/customers.csv gs://neo4j-workshop-data/genai-bom

Copying file://source-csvs/customers.csv [Content-Type=text/csv]...
/ [1 files][268.4 KiB/268.4 KiB]                                                
Operation completed over 1 objects/268.4 KiB.                                    


In [30]:
!gsutil cp source-csvs/items.csv gs://neo4j-workshop-data/genai-bom

Copying file://source-csvs/items.csv [Content-Type=text/csv]...
-[0 files][    0.0 B/  8.4 MiB]                                                - [1 files][  8.4 MiB/  8.4 MiB]                                                
Operation completed over 1 objects/8.4 MiB.                                      


In [31]:
!gsutil cp source-csvs/supplier-items.csv gs://neo4j-workshop-data/genai-bom

Copying file://source-csvs/supplier-items.csv [Content-Type=text/csv]...
-[0 files][    0.0 B/  7.4 MiB]                                                - [1 files][  7.4 MiB/  7.4 MiB]                                                
Operation completed over 1 objects/7.4 MiB.                                      


In [32]:
!gsutil cp source-csvs/suppliers.csv gs://neo4j-workshop-data/genai-bom

Copying file://source-csvs/suppliers.csv [Content-Type=text/csv]...
/ [1 files][613.8 KiB/613.8 KiB]                                                
Operation completed over 1 objects/613.8 KiB.                                    
