Author: Dave Fauth

Last Updated: 16 May 2025

Neo4j Graph Analytics for Snowflake Version: 0.3.13

The Weakly Connected Components (WCC) algorithm finds sets of connected nodes in directed and undirected graphs. Two nodes are connected, if there exists a path between them. The set of all nodes that are connected with each other form a component.

This example uses data that was sourced from `Relationship 360 - People Business Intelligence Sample (Massachusetts)`. This data set is a free dataset that is available in the Snowflake Marketplace.

## Setting Up
Before we run our algorithms, we need to set the proper permissions. But before we get started granting different roles, we need to ensure that you are using `accountadmin` to grant and create roles. Lets do that now:

In [None]:
-- you must be accountadmin to create role and grant permissions
USE ROLE accountadmin;

In [None]:
-- Create a database which we will use to prepare data for GDS.
CREATE DATABASE IF NOT EXISTS WCC_EXAMPLE;
CREATE SCHEMA IF NOT EXISTS WCC_EXAMPLE.PUBLIC;
USE SCHEMA WCC_EXAMPLE.PUBLIC;

Next let's set up the necessary roles, permissions, and resource access to enable Graph Analytics to operate on data within the neo4j_imdb.public schema. It creates a consumer role (gds_role) for users and administrators, grants the GDS application access to read from and write to tables and views, and ensures that future tables are accessible.

It also provides the application with access to the required compute pool and warehouse resources needed to run graph algorithms at scale.

In [None]:
USE SCHEMA WCC_EXAMPLE.PUBLIC;

-- Create a consumer role for users and admins of the GDS application
CREATE ROLE IF NOT EXISTS gds_user_role;
CREATE ROLE IF NOT EXISTS gds_admin_role;
GRANT APPLICATION ROLE neo4j_graph_analytics.app_user TO ROLE gds_user_role;
GRANT APPLICATION ROLE neo4j_graph_analytics.app_admin TO ROLE gds_admin_role;

CREATE DATABASE ROLE IF NOT EXISTS gds_db_role;
GRANT DATABASE ROLE gds_db_role TO ROLE gds_user_role;
GRANT DATABASE ROLE gds_db_role TO APPLICATION neo4j_graph_analytics;

-- Grant access to consumer data
GRANT USAGE ON DATABASE WCC_EXAMPLE TO ROLE gds_user_role;
GRANT USAGE ON SCHEMA WCC_EXAMPLE.PUBLIC TO ROLE gds_user_role;

-- Required to read tabular data into a graph
GRANT SELECT ON ALL TABLES IN DATABASE WCC_EXAMPLE TO DATABASE ROLE gds_db_role;

-- Ensure the consumer role has access to created tables/views
GRANT ALL PRIVILEGES ON FUTURE TABLES IN SCHEMA WCC_EXAMPLE.PUBLIC TO DATABASE ROLE gds_db_role;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA WCC_EXAMPLE.PUBLIC TO DATABASE ROLE gds_db_role;
GRANT CREATE TABLE ON SCHEMA WCC_EXAMPLE.PUBLIC TO DATABASE ROLE gds_db_role;
GRANT CREATE VIEW ON SCHEMA WCC_EXAMPLE.PUBLIC TO DATABASE ROLE gds_db_role;
GRANT ALL PRIVILEGES ON FUTURE VIEWS IN SCHEMA WCC_EXAMPLE.PUBLIC TO DATABASE ROLE gds_db_role;
GRANT ALL PRIVILEGES ON ALL VIEWS IN SCHEMA WCC_EXAMPLE.PUBLIC TO DATABASE ROLE gds_db_role;

-- Compute and warehouse access
GRANT USAGE ON WAREHOUSE NEO4J_GRAPH_ANALYTICS_APP_WAREHOUSE TO APPLICATION neo4j_graph_analytics;


In [None]:
USE ROLE gds_user_role;

In [None]:
CREATE TABLE WCC_EXAMPLE.PUBLIC.PERSON as 
select * from relationship_360__people_business_intelligence_sample_massachusetts.sample_datafeed_mass.person

In [None]:
CREATE TABLE WCC_EXAMPLE.PUBLIC.ORGANIZATION as 
select * from relationship_360__people_business_intelligence_sample_massachusetts.sample_datafeed_mass.organization

In [None]:
CREATE TABLE WCC_EXAMPLE.PUBLIC.POSITION as 
select * from relationship_360__people_business_intelligence_sample_massachusetts.sample_datafeed_mass.position

In [None]:
CREATE TABLE WCC_EXAMPLE.PUBLIC.BIO as 
select * from relationship_360__people_business_intelligence_sample_massachusetts.sample_datafeed_mass.bio

In [None]:
CREATE TABLE WCC_EXAMPLE.PUBLIC.COMMITTEE as 
select * from relationship_360__people_business_intelligence_sample_massachusetts.sample_datafeed_mass.committee

In [None]:
-- We are going to create a view and project the `PERSON_ID` to 'nodeId'.
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.PERSON_VW (nodeId) AS
SELECT p.PERSON_ID AS nodeId FROM WCC_EXAMPLE.PUBLIC.PERSON p;

In [None]:
-- We are going to create a view and project the `organization_id` to 'nodeId'.
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.ORGANIZATION_VW (nodeId) AS
SELECT o.organization_id AS nodeId FROM WCC_EXAMPLE.PUBLIC.ORGANIZATION o;

In [None]:
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.LOCATION_VW(CITY, STATE, POSTAL_CODE) 
AS 
SELECT DISTINCT o.CITY, o.STATE, o.POSTAL_CODE
FROM WCC_EXAMPLE.PUBLIC.ORGANIZATION o;

In [None]:
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.LOCATIONS_VW(nodeID, CITY, STATE, POSTAL_CODE) 
AS 
SELECT UUID_STRING(),  o.CITY, o.STATE, o.POSTAL_CODE
FROM WCC_EXAMPLE.PUBLIC.LOCATION_VW o;

In [None]:
SELECT * FROM WCC_EXAMPLE.PUBLIC.LOCATIONS_VW LIMIT 10;

In [None]:
-- We create a view for the Location nodeId and project the NODEID from the Locations_VW
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.LOCATION_NODES_VW(nodeID) 
AS 
SELECT o.NODEID
FROM WCC_EXAMPLE.PUBLIC.LOCATIONS_VW o;

In [None]:
-- Create the relationship table for LOCATION to ORGANIZATION with sourceNodeId and targetNodeId values
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.LOCATION_ORGANIZATION_VW(sourceNodeId, targetNodeId) 
AS
SELECT l.nodeID, o.ORGANIZATION_ID
FROM  WCC_EXAMPLE.PUBLIC.LOCATIONS_VW l
INNER JOIN WCC_EXAMPLE.PUBLIC.ORGANIZATION o
on l.CITY = O.CITY
and l.STATE = o.STATE
and l.POSTAL_CODE = o.POSTAL_CODE

In [None]:
-- Query the table to ensure it is correct.
SELECT SOURCENODEID, TO_CHAR(TARGETNODEID) FROM WCC_EXAMPLE.PUBLIC.LOCATION_ORGANIZATION_VW LIMIT 10;

In [None]:
-- Create the relationship table for PERSON TO COMMITTEE
CREATE OR REPLACE VIEW WCC_EXAMPLE.PUBLIC.COMMITTEE_VW(sourceNodeId, targetNodeId) 
AS 
SELECT c.person_id AS sourceNodeId, c.organization_id AS targetNodeId 
FROM WCC_EXAMPLE.PUBLIC.COMMITTEE c;

In [None]:
CALL neo4j_graph_analytics.graph.wcc('CPU_X64_L', {
  'project': {
    'defaultTablePrefix': 'WCC_EXAMPLE.PUBLIC',
    'nodeTables': ['PERSON_VW','ORGANIZATION_VW','LOCATION_NODES_VW'], 
    'relationshipTables': {
      'COMMITTEE_VW': {
        'sourceTable': 'PERSON_VW',
        'targetTable': 'ORGANIZATION_VW'
      },
        'LOCATION_ORGANIZATION_VW': {
        'sourceTable': 'LOCATION_NODES_VW',
        'targetTable': 'ORGANIZATION_VW'
      }
    }
  },
  'compute': {},
  'write': [
    {
      'nodeLabel': 'PERSON_VW',
      'outputTable': 'WCC_EXAMPLE.PUBLIC.USERS_COMPONENTS'
    }
  ]
});

Once the algorithm has successfully completed, we can query the USERS_COMPONENTS table and see how many persons are in each community.

In [None]:
SELECT uc.COMPONENT, COUNT(uc.NODEID) as NumberOfPersons 
FROM WCC_EXAMPLE.PUBLIC.USERS_COMPONENTS uc
GROUP BY UC.COMPONENT
ORDER BY NUMBEROFPERSONS DESC;

When you write the tables back to the database, the tables are owned by the application. You will need to alter the tables so that subsequent post-processing or visualization is easier by avoiding joins to the node ID mapping tables necessary for Graph Analytics. To do this, you have to transfer ownership of the tables back to accountadmin role.

In [None]:
USE ROLE ACCOUNTADMIN;
GRANT OWNERSHIP ON TABLE WCC_EXAMPLE.PUBLIC.USERS_COMPONENTS TO ROLE gds_user_role REVOKE CURRENT GRANTS;

USE ROLE gds_user_role;

In [None]:
DROP TABLE WCC_EXAMPLE.PUBLIC.USERS_COMPONENTS;