# Look for "communities" of providers in the Nashville/Davidson County CBSA. Make use of the Louvain community detection algorithm from Neo4j

In [2]:
import sqlite3
import pandas as pd

### Approach 1

In [3]:
# Create a new dataset for Neo4j that is a subset of Hop_Teaming_2018 using npi as an attribute, 
query = '''
SELECT n1.npi AS from_npi,
       SUM(hop.patient_count) AS total_referrals,
       n2.npi AS to_npi          
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN zip_cbsa as zip1
ON n1.provider_business_practice_location_address_postal_code = zip1.zip
INNER JOIN zip_cbsa as zip2
ON n2.provider_business_practice_location_address_postal_code = zip2.zip
WHERE zip1.cbsa = 34980
    AND zip2.cbsa = 34980
    AND n1.npi IS NOT NULL
    AND n2.npi IS NOT NULL
GROUP BY hop.from_npi, hop.to_npi;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    hop_neo4j_sqlite_npi = pd.read_sql(query, db)
    
hop_neo4j_sqlite_npi

Unnamed: 0,from_npi,total_referrals,to_npi
0,1003013160,58,1093741464
1,1003013160,32,1104202761
2,1003013160,57,1437194669
3,1003013160,27,1497828321
4,1003013160,50,1538153937
...,...,...,...
158287,1992985949,32,1811955917
158288,1992985949,153,1861478489
158289,1992987085,171,1104202761
158290,1992987085,50,1336223452


In [4]:
hop_neo4j_sqlite_npi.to_csv('hop_neo4j_npi.csv')

### Approach 2

In [10]:
# Create a new dataset for Neo4j that is a subset of Hop_Teaming_2018, 
query = '''
SELECT CASE WHEN n1.entity_type_code = 1 THEN "p: " 
            ELSE "h: " END ||
       IFNULL(CASE WHEN n1.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n1.`provider_last_name_(legal_name)` || ", " || n1.provider_first_name
            ELSE n1.`provider_organization_name_(legal_business_name)` END, "unnamed") || 
       " (" || n1.npi || ")" AS from_provider, 
       SUM(hop.patient_count) AS total_referrals,
       CASE WHEN n2.entity_type_code = 1 THEN "p: " 
            ELSE "h: " END ||
       IFNULL(CASE WHEN n2.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n2.`provider_last_name_(legal_name)` || ", " || n2.provider_first_name
            ELSE n2.`provider_organization_name_(legal_business_name)` END, "unnamed") || 
       " (" || n2.npi || ")" AS to_provider            
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN zip_cbsa as zip1
ON n1.provider_business_practice_location_address_postal_code = zip1.zip
INNER JOIN zip_cbsa as zip2
ON n2.provider_business_practice_location_address_postal_code = zip2.zip
WHERE zip1.cbsa = 34980
    AND zip2.cbsa = 34980
    AND n1.npi IS NOT NULL
    AND n2.npi IS NOT NULL
GROUP BY hop.from_npi, hop.to_npi;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    hop_neo4j_sqlite = pd.read_sql(query, db)
    
hop_neo4j_sqlite

Unnamed: 0,from_provider,total_referrals,to_provider
0,"p: GRABENSTEIN, WILLIAM (1003013160)",58,"h: ADVANCED DIAGNOSTIC IMAGING, PC (1093741464)"
1,"p: GRABENSTEIN, WILLIAM (1003013160)",32,h: VANDERBILT UNIVERSITY MEDICAL CENTER (11042...
2,"p: GRABENSTEIN, WILLIAM (1003013160)",57,h: SAINT THOMAS MEDICAL PARTNERS (1437194669)
3,"p: GRABENSTEIN, WILLIAM (1003013160)",27,h: TENNESSEE RETINA PC (1497828321)
4,"p: GRABENSTEIN, WILLIAM (1003013160)",50,h: TENNESSEE ORTHOPAEDIC ALLIANCE PA (1538153937)
...,...,...,...
158287,"p: BECK, ROBERT (1992985949)",32,h: TENNESSEE ONCOLOGY PLLC (1811955917)
158288,"p: BECK, ROBERT (1992985949)",153,h: RADIOLOGY ALLIANCE PC (1861478489)
158289,"p: HORTON, SUSANNE (1992987085)",171,h: VANDERBILT UNIVERSITY MEDICAL CENTER (11042...
158290,"p: HORTON, SUSANNE (1992987085)",50,"p: CHANDRASHEKAR, MEERA (1336223452)"


In [11]:
hop_neo4j_sqlite.to_csv('hop_neo4j.csv')

### Approach 3

In [17]:
# Create a new dataset for Neo4j that is a subset of Hop_Teaming_2018 displaying specialization to hospitals, 
query = '''
SELECT IIF (n1.entity_type_code = 1, "p", "h") AS from_entity_type,
       IFNULL(CASE WHEN n1.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n1.`provider_last_name_(legal_name)` || ", " || n1.provider_first_name
            ELSE n1.`provider_organization_name_(legal_business_name)` END, "unnamed") AS from_provider_name,
       n1.npi AS from_npi, 
       IFNULL(t1.specialization, 'None') AS from_specialization,
       SUM(hop.patient_count) AS total_referrals,
       IIF (n2.entity_type_code = 1, "p", "h") AS to_entity_type,
       IFNULL(CASE WHEN n2.`provider_organization_name_(legal_business_name)` IS NULL
               THEN n2.`provider_last_name_(legal_name)` || ", " || n2.provider_first_name
            ELSE n2.`provider_organization_name_(legal_business_name)` END, "unnamed") AS to_provider_name,
       n2.npi AS to_npi,
       IFNULL(t2.specialization, 'None') AS to_specialization
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t1
ON n1.primary_taxonomy = t1.code
INNER JOIN taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zip1
ON n1.provider_business_practice_location_address_postal_code = zip1.zip
INNER JOIN zip_cbsa as zip2
ON n2.provider_business_practice_location_address_postal_code = zip2.zip
WHERE zip1.cbsa = 34980
    AND zip2.cbsa = 34980
    AND n1.npi IS NOT NULL
    AND n2.npi IS NOT NULL
GROUP BY hop.from_npi, t1.specialization, hop.to_npi, t2.specialization;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    hop_neo4j_specialization_sqlite = pd.read_sql(query, db)
    
hop_neo4j_specialization_sqlite

Unnamed: 0,from_entity_type,from_provider_name,from_npi,from_specialization,total_referrals,to_entity_type,to_provider_name,to_npi,to_specialization
0,p,"GRABENSTEIN, WILLIAM",1003013160,,58,h,"ADVANCED DIAGNOSTIC IMAGING, PC",1093741464,
1,p,"GRABENSTEIN, WILLIAM",1003013160,,32,h,VANDERBILT UNIVERSITY MEDICAL CENTER,1104202761,
2,p,"GRABENSTEIN, WILLIAM",1003013160,,57,h,SAINT THOMAS MEDICAL PARTNERS,1437194669,
3,p,"GRABENSTEIN, WILLIAM",1003013160,,27,h,TENNESSEE RETINA PC,1497828321,Retina Specialist
4,p,"GRABENSTEIN, WILLIAM",1003013160,,50,h,TENNESSEE ORTHOPAEDIC ALLIANCE PA,1538153937,
...,...,...,...,...,...,...,...,...,...
158287,p,"BECK, ROBERT",1992985949,,32,h,TENNESSEE ONCOLOGY PLLC,1811955917,Hematology & Oncology
158288,p,"BECK, ROBERT",1992985949,,153,h,RADIOLOGY ALLIANCE PC,1861478489,Diagnostic Radiology
158289,p,"HORTON, SUSANNE",1992987085,,171,h,VANDERBILT UNIVERSITY MEDICAL CENTER,1104202761,
158290,p,"HORTON, SUSANNE",1992987085,,50,p,"CHANDRASHEKAR, MEERA",1336223452,


In [18]:
hop_neo4j_specialization_sqlite.to_csv('hop_neo4j_specialization.csv')

## Neo4j stuff

cp hop_neo4j.csv /Users/thidathornvanitsthian/Library/Application\ Support/Neo4j\ Desktop/Application/relate-data/dbmss/dbms-79293fc4-ed07-4c60-bb54-a30a547ade7f/import

In [None]:
LOAD CSV WITH HEADERS
FROM 'file:///hop_neo4j_specialization.csv' AS line
WITH line
MERGE (from:Provider {name: line.from_provider_name, npi:line.from_npi, specialization:line.from_specialization, entity_type:line.from_entity_type})
MERGE (to:Provider {name: line.to_provider_name, npi:line.to_npi, specialization:line.to_specialization, entity_type:line.to_entity_type})
CREATE (from)-[:REFERRED {num_referrals: toInteger(line.total_referrals)}]->(to);

In [None]:
CALL gds.graph.project( 'hopnpi',
'Provider', {
REFERRED: {
orientation: 'UNDIRECTED', aggregation: 'SUM'
} },
{
relationshipProperties: 'num_referrals'
} )

In [None]:
CALL gds.louvain.stream('hopnpi', { relationshipWeightProperty: 'num_referrals' }) YIELD nodeId, communityId
RETURN gds.util.asNode(nodeId).name AS name, communityId
ORDER BY name ASC

## Louvain Analysis

In [3]:
louvain1 = pd.read_csv('hop_neo4j_npi_providers.csv')
louvain1.groupby('communityId').count().sort_values(by = 'name', ascending=False)

Unnamed: 0_level_0,name
communityId,Unnamed: 1_level_1
4418,1943
2983,1386
4727,1370
6328,726
1642,596
...,...
1629,2
1586,2
5076,2
5078,2


In [4]:
louvain1

Unnamed: 0,name,communityId
0,1003013160,2983
1,1003013947,4727
2,1003019902,4418
3,1003028770,1642
4,1003031261,2983
...,...,...
8305,1992972087,4727
8306,1992978845,3249
8307,1992985055,3300
8308,1992985949,4727


In [27]:
louvain = pd.read_csv('louvain.csv')
louvain.groupby('communityId').count().sort_values(by = 'name', ascending=False)

Unnamed: 0_level_0,name
communityId,Unnamed: 1_level_1
2190,1950
4620,1354
2499,1283
4100,734
1723,595
...,...
4623,2
4674,2
4687,2
4749,2


In [43]:
list(louvain.columns)

['name', 'communityId']

In [54]:
d = list(louvain[louvain["communityId"] == 2190]['name'].unique())
df = pd.DataFrame(data=d)
df

Unnamed: 0,0
0,h: ADVANCED NEUROSCIENCES INSTITUTE INC (14373...
1,h: AMBULATORY SURGERY CENTER OF COOL SPRINGS L...
2,h: ASSOCIATED CHIROPRACTIC AND MEDICAL P.C. (1...
3,h: ATHENA CONSULTING AND PSYCHOLOGICAL SERVICE...
4,"h: AVALON HEALTH CARE, LLC (1912905183)"
...,...
1945,"p: ZIC, JOHN (1942210877)"
1946,"p: ZIMMERMAN, CARL (1386736718)"
1947,"p: ZIMMERMAN, ELI (1447577119)"
1948,"p: ZOFFUTO, TERESA (1144587205)"


In [53]:
df.to_csv('communityId_2190.csv')