In [1]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

### Support code that makes things run

In [2]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [3]:
session = driver.session(database="neo4j")

In [4]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [5]:
cursor = connection.cursor()

In [6]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [7]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [8]:
def my_neo4j_number_nodes_relationships():
    "print the number of nodes and relationships"
   
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    print("-------------------------")
    print("  Nodes:", number_nodes)
    print("  Relationships:", number_relationships)
    print("-------------------------")


### Building code that builds provider nodes

In [9]:
def my_neo4j_create_provider_node(provider):
    "create a node with label Provider"
    
    query = """
    
    CREATE (:Provider {name: $provider_npi})
    
    """
    
    session.run(query, provider_npi=provider_npi)

### Building code for drug nodes

In [10]:
def my_neo4j_create_drug_node(drug):
    "create a node with label Drug"
    
    query = """
    
    CREATE (:Drug {code: $drug_code})
    
    """
    
    session.run(query, drug_code=drug_code)

### Building 1 way relationship between provides and drug prescriptions with a weight

In [11]:
def my_neo4j_create_relationship_one_way(provider, code ,total_amount_paid):
    "Create a one-way relationship from a provider to a drug they have prescribed with a weight"
    
    query = """
    MATCH (p:Provider {name: $provider}), 
          (d:Drug {code: $code} )
    CREATE (p)-[r:Prescribes {total_amount_paid:$total_amount_paid}]->(d)
    RETURN p, r, d
    """
    
    session.run(query, provider=provider, code=code,total_amount_paid = total_amount_paid)


### Clean DB to see if it works

In [12]:
my_neo4j_wipe_out_database()

### Building Nodes

In [13]:
connection.rollback()

query = """

select provider_npi
from providers
where provider_state = 'AZ'

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    provider_npi = row[0]
    
    my_neo4j_create_provider_node(provider_npi)
   
    

### Building Drug nodes

In [14]:
connection.rollback()

query = """

select  
d.drug_code,
d.drug_description
from drugs d
group by 1,2
    


"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    drug_code = row[0]
    
    my_neo4j_create_drug_node(drug_code)
   
    

### Building drug prescription relationship

In [15]:
connection.rollback()

query = """

select 
r.provider_npi as from_provider,
r.code as code,
r.total_amount_paid
from services_rendered r
     inner join (
         select provider_npi
            from providers
            where provider_state = 'AZ') k on k.provider_npi = r.provider_npi
where r.drug_indicator = 'Y'

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    provider = row[0]
    code = row[1]
    total_amount_paid = row[2]
    
    my_neo4j_create_relationship_one_way(provider, code, total_amount_paid)
  

### Building procedure nodes

In [16]:
def my_neo4j_create_procedure_node(procedure):
    "create a node with label Procedure"
    
    query = """
    
    CREATE (:Procedure {code: $procedure})
    
    """
    
    session.run(query, procedure=procedure)

In [17]:
connection.rollback()

query = """

select 
procedure_code as procedure,
procedure_description as description
from procedures


"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    procedure = row[0]
    
    my_neo4j_create_procedure_node(procedure)
   

### Build Procedure Relationship

In [18]:
def my_neo4j_create_relationship_one_way_procedure(provider, procedure, total_amount_paid):
    "Create a one-way relationship from a provider to a drug they have prescribed with a weight"
    
    query = """
    MATCH (p:Provider {name:$provider}), 
          (d:Procedure {code: $procedure})
    CREATE (p)-[r:Conducted {total_amount_paid:$total_amount_paid}]->(d)
    RETURN p, r, d
    """
    
    session.run(query, provider=provider, procedure=procedure, total_amount_paid = total_amount_paid)


In [19]:
connection.rollback()

query = """

select 
r.provider_npi as from_provider,
r.code as procedure,
r.total_amount_paid
from services_rendered r
     inner join (
         select provider_npi
            from providers
            where provider_state = 'AZ') k on k.provider_npi = r.provider_npi
where r.drug_indicator = 'N'

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    provider = row[0]
    procedure = row[1]
    total_amount_paid = row[2]
    
    my_neo4j_create_relationship_one_way_procedure(provider, procedure, total_amount_paid)

In [20]:
def my_neo4j_create_relationship_one_way_procedure(provider, procedure, total_amount_paid):
    query = """
    MATCH (p:Provider {NPI: $provider}), (pr:Procedure {code: $procedure})
    CREATE (p)-[:RECOMMENDS {amount_paid: $total_amount_paid}]->(pr)
    """
    session.run(query, provider=provider, procedure=procedure, total_amount_paid=total_amount_paid)

In [21]:
query = """
select 
    r.provider_npi as from_provider,
    r.code as procedure,
    r.total_amount_paid
from services_rendered r
    inner join (
        select provider_npi
        from providers
        where provider_state = 'FL'
    ) k on k.provider_npi = r.provider_npi
where r.drug_indicator = 'N'
order by r.total_amount_paid desc
limit 100
"""

cursor.execute(query)

# Fetch rows
rows = cursor.fetchall()

# Iterate through rows and create relationships in Neo4j
for row in rows:
    provider = row[0]
    procedure = row[1]
    total_amount_paid = row[2]
    
    # Print to check the data before calling the function
    print(f"Creating relationship for Provider: {provider}, Procedure: {procedure}, Total Paid: {total_amount_paid}")
    
    # Create the relationship in Neo4j
    my_neo4j_create_relationship_one_way_procedure(provider, procedure, total_amount_paid)


Creating relationship for Provider: 1699115014, Procedure: 14301, Total Paid: 1967.22
Creating relationship for Provider: 1417073503, Procedure: 15260, Total Paid: 1805.39
Creating relationship for Provider: 1699115014, Procedure: 15260, Total Paid: 1767.61
Creating relationship for Provider: 1699115014, Procedure: 14061, Total Paid: 1762.21
Creating relationship for Provider: 1740556364, Procedure: 14060, Total Paid: 1541.34
Creating relationship for Provider: 1164614004, Procedure: 36465, Total Paid: 1337.62
Creating relationship for Provider: 1619081155, Procedure: 14301, Total Paid: 1186.38
Creating relationship for Provider: 1467458984, Procedure: 14301, Total Paid: 1169.76
Creating relationship for Provider: 1902828049, Procedure: 14301, Total Paid: 1161.96
Creating relationship for Provider: 1538272653, Procedure: 14301, Total Paid: 1158.08
Creating relationship for Provider: 1073518692, Procedure: 14301, Total Paid: 1153.62
Creating relationship for Provider: 1871577700, Proced

In [22]:
query = """
MATCH (p:Service {provider_npi: $provider_npi})-[:PERFORMED]->(d:DrugProcedure)
GROUP BY p.provider_npi, d.procedure
WITH p.provider_npi, d.procedure, COUNT(*) AS count
ORDER BY count DESC
RETURN p.provider_npi, d.procedure, count


"""

In [23]:
def my_neo4j_shortest_path(from_provider, to_provider):
    """
    Find the shortest path from one provider to another in Arizona, traversing through drugs.
    """
    # Drop any existing graph to avoid conflicts
    query = "CALL gds.graph.drop('ds_graph', false) YIELD graphName"
    session.run(query)

    # Project the graph with the proper direction and weights
    query = """
    CALL gds.graph.project(
        'ds_graph',
        ['Provider', 'Drug'],
        { Prescribes: { type: 'Prescribes', properties: ['total_amount_paid'] } }
    )
    """
    session.run(query)

    # Adjusted query: Path between providers in Arizona through drugs
    query = """
    MATCH (source:Provider {name: $from_provider})-[:Prescribes]->(drug:Drug)<-[:Prescribes]-(target:Provider {name: $to_provider})
    CALL gds.shortestPath.dijkstra.stream(
        'ds_graph',
        { 
          sourceNode: source, 
          targetNode: target, 
          relationshipWeightProperty: 'total_amount_paid'
        }
    )
    YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs
    RETURN
        gds.util.asNode(nodeIds[0]).name AS start_provider,
        [nodeId IN nodeIds[1..-1] WHERE gds.util.asNode(nodeId):Drug | gds.util.asNode(nodeId).code] AS drugs_in_path,
        gds.util.asNode(nodeIds[-1]).name AS end_provider,
        totalCost,
        costs
    """
    # Execute the query
    result = session.run(query, from_provider=from_provider, to_provider=to_provider)
    
    results_found = False
    for r in result:
        results_found = True
        total_cost = float(r['totalCost'])
        print("\n--------------------------------")
        print(f"   Total Cost (Total Amount Paid): {total_cost}")
        print("--------------------------------")
        
        drugs_in_path = r['drugs_in_path']
        print(f"Drugs in Path: {drugs_in_path}")
    
    if not results_found:
        print("No path found. Please verify the data and relationships.")

    

In [24]:
my_neo4j_shortest_path('1164683793', '1174506265')

No path found. Please verify the data and relationships.


In [25]:
def my_neo4j_shortest_path(from_provider, to_provider):
    """
    Given a from_provider and to_provider, run and print the shortest path via procedures.
    """
    
    # Drop any existing graph to avoid conflicts (ensure it's a fresh graph)
    query = "CALL gds.graph.drop('ds_graph', false) YIELD graphName"
    session.run(query)

    # Project the graph with 'Provider' nodes and 'Prescribes' relationships (via procedure)
    query = """
    CALL gds.graph.project(
        'ds_graph', 
        'Provider', 
        'Prescribes', 
        { relationshipProperties: 'total_amount_paid' }
    )
    """
    session.run(query)

    # Query to find the shortest path between two providers (through procedures)
    query = """
    MATCH (source:Provider {name: $from_provider}), 
          (target:Provider {name: $to_provider})
    CALL gds.shortestPath.dijkstra.stream(
        'ds_graph', 
        { 
          sourceNode: source, 
          targetNode: target, 
          relationshipWeightProperty: 'total_amount_paid'
        }
    )
    YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
    RETURN
        gds.util.asNode(sourceNode).name AS from,
        gds.util.asNode(targetNode).name AS to,
        totalCost,
        [nodeId IN nodeIds | gds.util.asNode(nodeId).name] AS providers_in_path,
        costs,
        path
    ORDER BY index
    """
    
    # Ensure you're passing the correct parameters here
    result = session.run(query, from_provider=from_provider, to_provider=to_provider)

    results_found = False
    for r in result:
        results_found = True
        total_cost = float(r['totalCost'])
        print("\n--------------------------------")
        print(f"   Total Cost (Total Amount Paid): {total_cost}")
        print("--------------------------------")
        
        providers_in_path = r['providers_in_path']
        print(f"Providers in Path: {providers_in_path}")
    
    if not results_found:
        print("No path found. Please verify the data and relationships.")



In [26]:
my_neo4j_shortest_path('1962780841', '1851515381')

No path found. Please verify the data and relationships.


In [27]:
# Step 1: Run Louvain Community Detection
community_detection_query = """
CALL gds.louvain.write('ds_graph', {
    writeProperty: 'community'
})
"""
session.run(community_detection_query)

find_anomalous_providers_query = """
MATCH (p:Provider)
WHERE p.community IS NOT NULL
WITH p.community AS community, count(p) AS community_size
WHERE community_size < 5  
MATCH (p2:Provider)
WHERE p2.community = community
RETURN p2.name AS provider_name, p2.community AS community, community_size
ORDER BY community_size ASC
LIMIT 500
"""

result = session.run(find_anomalous_providers_query)

# Step 3: Display results
for record in result:
    print(f"Provider Name: {record['provider_name']}, Community: {record['community']}, Community Size: {record['community_size']}")

Provider Name: 1003079773, Community: 0, Community Size: 1
Provider Name: 1013160563, Community: 1, Community Size: 1
Provider Name: 1013170695, Community: 2, Community Size: 1
Provider Name: 1013457662, Community: 3, Community Size: 1
Provider Name: 1023338241, Community: 4, Community Size: 1
Provider Name: 1023372950, Community: 5, Community Size: 1
Provider Name: 1033182381, Community: 6, Community Size: 1
Provider Name: 1033386669, Community: 7, Community Size: 1
Provider Name: 1033568761, Community: 8, Community Size: 1
Provider Name: 1043620065, Community: 9, Community Size: 1
Provider Name: 1053631457, Community: 10, Community Size: 1
Provider Name: 1063470623, Community: 11, Community Size: 1
Provider Name: 1063619484, Community: 12, Community Size: 1
Provider Name: 1063623148, Community: 13, Community Size: 1
Provider Name: 1063668531, Community: 14, Community Size: 1
Provider Name: 1073585402, Community: 15, Community Size: 1
Provider Name: 1073860920, Community: 16, Communit