In [1]:
import sqlite3
import pandas as pd

In [10]:
# Create a new dataset for Neo4j that is a subset of Hop_Teaming_2018, 
query = '''
SELECT CASE WHEN n1.entity_type_code = 1 THEN "p: " 
            ELSE "h: " END ||
       IFNULL(CASE WHEN n1.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n1.`provider_last_name_(legal_name)` || ", " || n1.provider_first_name
            ELSE n1.`provider_organization_name_(legal_business_name)` END, "unnamed") || 
       " (" || n1.npi || ")" AS from_provider, 
       SUM(hop.patient_count) AS total_referrals,
       CASE WHEN n2.entity_type_code = 1 THEN "p: " 
            ELSE "h: " END ||
       IFNULL(CASE WHEN n2.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n2.`provider_last_name_(legal_name)` || ", " || n2.provider_first_name
            ELSE n2.`provider_organization_name_(legal_business_name)` END, "unnamed") || 
       " (" || n2.npi || ")" AS to_provider            
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN zip_cbsa as zip1
ON n1.provider_business_practice_location_address_postal_code = zip1.zip
INNER JOIN zip_cbsa as zip2
ON n2.provider_business_practice_location_address_postal_code = zip2.zip
WHERE zip1.cbsa = 34980
    AND zip2.cbsa = 34980
    AND n1.npi IS NOT NULL
    AND n2.npi IS NOT NULL
GROUP BY hop.from_npi, hop.to_npi;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    hop_neo4j_sqlite = pd.read_sql(query, db)
    
hop_neo4j_sqlite

Unnamed: 0,from_provider,total_referrals,to_provider
0,"p: GRABENSTEIN, WILLIAM (1003013160)",58,"h: ADVANCED DIAGNOSTIC IMAGING, PC (1093741464)"
1,"p: GRABENSTEIN, WILLIAM (1003013160)",32,h: VANDERBILT UNIVERSITY MEDICAL CENTER (11042...
2,"p: GRABENSTEIN, WILLIAM (1003013160)",57,h: SAINT THOMAS MEDICAL PARTNERS (1437194669)
3,"p: GRABENSTEIN, WILLIAM (1003013160)",27,h: TENNESSEE RETINA PC (1497828321)
4,"p: GRABENSTEIN, WILLIAM (1003013160)",50,h: TENNESSEE ORTHOPAEDIC ALLIANCE PA (1538153937)
...,...,...,...
158287,"p: BECK, ROBERT (1992985949)",32,h: TENNESSEE ONCOLOGY PLLC (1811955917)
158288,"p: BECK, ROBERT (1992985949)",153,h: RADIOLOGY ALLIANCE PC (1861478489)
158289,"p: HORTON, SUSANNE (1992987085)",171,h: VANDERBILT UNIVERSITY MEDICAL CENTER (11042...
158290,"p: HORTON, SUSANNE (1992987085)",50,"p: CHANDRASHEKAR, MEERA (1336223452)"


In [11]:
hop_neo4j_sqlite.to_csv('hop_neo4j.csv')

In [17]:
# Create a new dataset for Neo4j that is a subset of Hop_Teaming_2018 displaying specialization to hospitals, 
query = '''
SELECT IIF (n1.entity_type_code = 1, "p", "h") AS from_entity_type,
       IFNULL(CASE WHEN n1.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n1.`provider_last_name_(legal_name)` || ", " || n1.provider_first_name
            ELSE n1.`provider_organization_name_(legal_business_name)` END, "unnamed") AS from_provider_name,
       n1.npi AS from_npi, 
       IFNULL(t1.specialization, 'None') AS from_specialization,
       SUM(hop.patient_count) AS total_referrals,
       IIF (n2.entity_type_code = 1, "p", "h") AS to_entity_type,
       IFNULL(CASE WHEN n2.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n2.`provider_last_name_(legal_name)` || ", " || n2.provider_first_name
            ELSE n2.`provider_organization_name_(legal_business_name)` END, "unnamed") AS to_provider_name,
       n2.npi AS to_npi,
       IFNULL(t2.specialization, 'None') AS to_specialization
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t1
ON n1.primary_taxonomy = t1.code
INNER JOIN taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zip1
ON n1.provider_business_practice_location_address_postal_code = zip1.zip
INNER JOIN zip_cbsa as zip2
ON n2.provider_business_practice_location_address_postal_code = zip2.zip
WHERE zip1.cbsa = 34980
    AND zip2.cbsa = 34980
    AND n1.npi IS NOT NULL
    AND n2.npi IS NOT NULL
GROUP BY hop.from_npi, t1.specialization, hop.to_npi, t2.specialization;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    hop_neo4j_specialization_sqlite = pd.read_sql(query, db)
    
hop_neo4j_specialization_sqlite

Unnamed: 0,from_entity_type,from_provider_name,from_npi,from_specialization,total_referrals,to_entity_type,to_provider_name,to_npi,to_specialization
0,p,"GRABENSTEIN, WILLIAM",1003013160,,58,h,"ADVANCED DIAGNOSTIC IMAGING, PC",1093741464,
1,p,"GRABENSTEIN, WILLIAM",1003013160,,32,h,VANDERBILT UNIVERSITY MEDICAL CENTER,1104202761,
2,p,"GRABENSTEIN, WILLIAM",1003013160,,57,h,SAINT THOMAS MEDICAL PARTNERS,1437194669,
3,p,"GRABENSTEIN, WILLIAM",1003013160,,27,h,TENNESSEE RETINA PC,1497828321,Retina Specialist
4,p,"GRABENSTEIN, WILLIAM",1003013160,,50,h,TENNESSEE ORTHOPAEDIC ALLIANCE PA,1538153937,
...,...,...,...,...,...,...,...,...,...
158287,p,"BECK, ROBERT",1992985949,,32,h,TENNESSEE ONCOLOGY PLLC,1811955917,Hematology & Oncology
158288,p,"BECK, ROBERT",1992985949,,153,h,RADIOLOGY ALLIANCE PC,1861478489,Diagnostic Radiology
158289,p,"HORTON, SUSANNE",1992987085,,171,h,VANDERBILT UNIVERSITY MEDICAL CENTER,1104202761,
158290,p,"HORTON, SUSANNE",1992987085,,50,p,"CHANDRASHEKAR, MEERA",1336223452,


In [18]:
hop_neo4j_specialization_sqlite.to_csv('hop_neo4j_specialization.csv')

## Neo4j stuff

cp hop_neo4j.csv /Users/thidathornvanitsthian/Library/Application\ Support/Neo4j\ Desktop/Application/relate-data/dbmss/dbms-79293fc4-ed07-4c60-bb54-a30a547ade7f/import

In [None]:
LOAD CSV WITH HEADERS
FROM 'file:///hop_neo4j_specialization.csv' AS line
WITH line
MERGE (from:Provider {name: line.from_provider_name, npi:line.from_npi, specialization:line.from_specialization, entity_type:line.from_entity_type})
MERGE (to:Provider {name: line.to_provider_name, npi:line.to_npi, specialization:line.to_specialization, entity_type:line.to_entity_type})
CREATE (from)-[:REFERRED {num_referrals: toInteger(line.total_referrals)}]->(to);

In [None]:
CALL gds.graph.project( 'hop',
'Provider', {
REFERRED: {
orientation: 'UNDIRECTED', aggregation: 'SUM'
} },
{
relationshipProperties: 'num_referrals'
} )

In [None]:
CALL gds.louvain.stream('hop', { relationshipWeightProperty: 'num_referrals' }) YIELD nodeId, communityId
RETURN gds.util.asNode(nodeId).name AS name, communityId
ORDER BY name ASC

In [22]:
louvain = pd.read_csv('louvain.csv')
louvain.groupby('communityId').count()

Unnamed: 0_level_0,name
communityId,Unnamed: 1_level_1
220,19
331,3
373,2
578,64
606,2
...,...
8184,55
8244,18
8249,2
8254,2
