# Create Graph Database

POD Allocation | Project 3
Goal: Create simple graph database

Nodes: pod_id 
Relationship: asset_id
Labels: weight

Steps

- Setup Cursors and connection to Containers
- Run some queries
- Create Graph Database
- View Graph in Neo4j Browser

In [1]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [2]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [3]:
session = driver.session(database="neo4j")

## Setup Cursors and Connection

In [4]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [5]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [6]:
def my_neo4j_number_nodes_relationships():
    "print the number of nodes and relationships"
   
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    print("-------------------------")
    print("  Nodes:", number_nodes)
    print("  Relationships:", number_relationships)
    print("-------------------------")

# Custom Functions for our Database

In [7]:
def my_neo4j_create_pod_node(pod_id):
    "create a node with label pod_id"
    
    query = """
    
    CREATE (:Hedge_Fund {name: $pod_id})
    
    """
    
    session.run(query, pod_id=pod_id)

In [8]:
def my_neo4j_create_asset_node(asset_name):
    "create a node with label asset_name"
    
    query = """
    
    CREATE (:Asset {name: $asset_name})
    
    """

    session.run(query, asset_name=asset_name)

In [9]:
def my_neo4j_create_node_asset_relationship(pod_id, asset_id, weight):
    "create relationships between pod_id and asset_id with weight"
    
    query = """
    
    MATCH (p:Hedge_Fund {name: $pod_id})
    MATCH (a:Asset {name: $asset_id})
    CREATE (p)-[:HOLDS {weight: $weight}]->(a)
    
    """
    session.run(query, pod_id=pod_id, asset_id=asset_id, weight=weight)

In [10]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [11]:
cursor = connection.cursor()

## Wipe out Neo4j database

In [12]:
my_neo4j_wipe_out_database()

## Check number of nodes and relationships

In [13]:
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 0
  Relationships: 0
-------------------------


## Query the list of PODs and create Nodes

In [14]:
connection.rollback()

query = """

select distinct(pod_id)
from temp_pods_current_allocation
order by pod_id

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    pod = row[0]
    
    my_neo4j_create_pod_node(pod)

In [15]:
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 22
  Relationships: 0
-------------------------


## Query the list of ASSETS and create Nodes

In [16]:
connection.rollback()

query = """

select distinct(asset_id)
from temp_pods_current_allocation

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    asset = row[0]
    my_neo4j_create_asset_node(asset)

In [17]:
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 3839
  Relationships: 0
-------------------------


## Create RELATIONSHIPS between PODS and ASSETS

In [18]:
connection.rollback()

query = """

select pod_id, asset_id, weight
from temp_pods_current_allocation

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    pod_id = row[0]
    asset_id = row[1]
    weight = float(row[2])
    my_neo4j_create_node_asset_relationship(pod_id, asset_id, weight)

In [19]:
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 3839
  Relationships: 5472
-------------------------


# Analysis
## Run Graph Algorithms

Run this cell to start and wipeout stuff:

In [20]:
query = "CALL gds.graph.drop('ds_graph', false) yield graphName"
session.run(query)

query = """

CALL gds.graph.project(
  'hedgeFundGraph',
  ['Hedge_Fund', 'Asset'],
  {
    HOLDS: {
      orientation: 'UNDIRECTED',
      properties: 'weight'
    }
  }
)

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7ff32bcb91c0>

## Page Rank

In [21]:
query = """

CALL gds.pageRank.stream('hedgeFundGraph')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS hedge_fund, score
ORDER BY score DESC
LIMIT 10

"""

my_neo4j_run_query_pandas(query)

ClientError: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `gds.graph.project`: Caused by: java.lang.IllegalArgumentException: A graph with name 'hedgeFundGraph' already exists.}