In [56]:
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm

In [57]:
pd.set_option("display.max_columns", 500)

In [58]:
#Listing currently existing tables in the database
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT name
        FROM sqlite_master 
        WHERE type ='table' 
        AND name NOT LIKE 'sqlite_%';
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,name
0,taxonomy
1,hop_teaming
2,cbsa
3,npidata
4,npidata_nashville
5,filtered_hop_teaming
6,hospitals
7,referrals


In [134]:
#See number of rows
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT COUNT(DISTINCT from_npi)
        From referrals
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,COUNT(DISTINCT from_npi)
0,3533


In [172]:
#See first row
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT *
        From referrals
        LIMIT 1
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0.1,Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_zip,from_npi_specialty,from_entity_type_code,to_zip,to_npi_specialty,to_entity_type_code,to_facility,to_facility_group,to_facility_name_normalised
0,0,615039,1013179860,1417938846,71,82,35.049,42.548,37075,Internal Medicine,1.0,37083,General Acute Care Hospital,2.0,"MACON COUNTY GENERAL HOSPITAL, INC.",Macon County General Hospital,Macon County General Hospital


## Look at Facility Groups, Classifications, and Specializations

In [101]:
# Look at facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT DISTINCT(to_facility_group)
        FROM referrals
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_facility_group
0,Macon County General Hospital
1,Maury Regional Medical Center
2,HCA
3,Ascension Saint Thomas
4,Vanderbilt University Medical Center
5,Williamson Medical Center
6,NorthCrest Medical Center
7,Nashville General Hosptial
8,Sumner Regional Medical Center
9,Riverview Regional Medical Center


In [150]:
# Look at categories
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT DISTINCT(from_npi_specialty), SUM(transaction_count)
        FROM referrals
        GROUP BY from_npi_specialty
        ORDER BY SUM(transaction_count) DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,from_npi_specialty,SUM(transaction_count)
0,Internal Medicine,561652
1,Radiology,374292
2,Nurse Practitioner,97780
3,Family Medicine,94318
4,Pathology,59355
5,Orthopaedic Surgery,49212
6,Anesthesiology,44280
7,Emergency Medicine,41568
8,Surgery,37827
9,"Nurse Anesthetist, Certified Registered",36006


In [155]:
# Look at specializations
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT DISTINCT(taxonomy.specialization), SUM(referrals.transaction_count)
        FROM referrals
        JOIN npidata_nashville
            ON referrals.from_npi = npidata_nashville.npi
        JOIN taxonomy
            ON npidata_nashville.taxonomy_code = taxonomy.taxonomy_code
        GROUP BY taxonomy.specialization
        ORDER BY SUM(referrals.transaction_count) DESC
        LIMIT 20
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,specialization,SUM(referrals.transaction_count)
0,,548482
1,Diagnostic Radiology,354026
2,Cardiovascular Disease,172681
3,Anatomic Pathology & Clinical Pathology,52249
4,Nephrology,46017
5,Family,44548
6,Pulmonary Disease,37962
7,Hematology & Oncology,24684
8,Gastroenterology,23958
9,Interventional Cardiology,23209


In [158]:
# Look at classifications and specializations together
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT taxonomy.classification, taxonomy.specialization, SUM(referrals.transaction_count)
        FROM referrals
        JOIN npidata_nashville
            ON referrals.from_npi = npidata_nashville.npi
        JOIN taxonomy
            ON npidata_nashville.taxonomy_code = taxonomy.taxonomy_code
        GROUP BY taxonomy.classification, taxonomy.specialization
        ORDER BY SUM(referrals.transaction_count) DESC
        LIMIT 50
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,classification,specialization,SUM(referrals.transaction_count)
0,Radiology,Diagnostic Radiology,354026
1,Internal Medicine,Cardiovascular Disease,172681
2,Internal Medicine,,140663
3,Family Medicine,,89936
4,Pathology,Anatomic Pathology & Clinical Pathology,52249
5,Internal Medicine,Nephrology,46017
6,Nurse Practitioner,Family,44548
7,Emergency Medicine,,38991
8,Internal Medicine,Pulmonary Disease,37962
9,"Nurse Anesthetist, Certified Registered",,36006


## This is my starting code for NPI to Facility Groups

In [137]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
            ORDER BY to_groups DESC
        )
        SELECT *
        FROM CTE
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,from_npi,to_groups,npi_transactions
0,1851677157,8,1176
1,1558355941,7,2574
2,1104933738,7,4570
3,1104837327,7,1522
4,1902823099,6,1336
...,...,...,...
3528,1003071333,1,86
3529,1003063314,1,374
3530,1003031261,1,104
3531,1003019902,1,294


In [138]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
            ORDER BY to_groups DESC
        )
        SELECT SUM(npi_transactions) AS total_transactions
        FROM CTE
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,total_transactions
0,1616083


In [131]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT SUM(transaction_count) AS total_transactions
        FROM referrals
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,total_transactions
0,1616083


In [139]:
# Crosscheck with random npi
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT *
        FROM referrals
        WHERE from_npi = '1851677157'
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0.1,Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_zip,from_npi_specialty,from_entity_type_code,to_zip,to_npi_specialty,to_entity_type_code,to_facility,to_facility_group,to_facility_name_normalised
0,145,13101574,1851677157,1861479545,229,316,49.041,49.564,37083,Nurse Practitioner,1.0,38401,General Acute Care Hospital,2.0,MAURY REGIONAL HOSPITAL,Maury Regional Medical Center,Maury Regional Medical Center
1,983,16079213,1851677157,1295780476,93,158,36.753,45.089,37083,Nurse Practitioner,1.0,37207,General Acute Care Hospital,2.0,HTI MEMORIAL HOSPITAL CORPORATION,HCA,TriStar Skyline Medical Center HCA
2,2069,15715727,1851677157,1023055126,72,90,47.956,41.384,37083,Nurse Practitioner,1.0,37203,General Acute Care Hospital,2.0,"HCA HEALTH SERVICES OF TENNESSEE, INC.",HCA,Centennial Medical Center HCA
3,2720,16034734,1851677157,1265445506,37,65,33.985,35.362,37083,Nurse Practitioner,1.0,37067,General Acute Care Hospital,2.0,WILLIAMSON COUNTY HOSPITAL DISTRICT,Williamson Medical Center,Williamson County Hospital
4,3337,16567504,1851677157,1669567897,94,129,42.822,43.584,37083,Nurse Practitioner,1.0,37172,General Acute Care Hospital,2.0,NORTHCREST MEDICAL CENTER,NorthCrest Medical Center,NorthCrest Medical Center
5,4457,16213508,1851677157,1396882205,76,98,44.429,42.415,37083,Nurse Practitioner,1.0,37232,General Acute Care Hospital,2.0,VANDERBILT UNIVERSITY MEDICAL CENTER,Vanderbilt University Medical Center,Vanderbilt University Medical Center
6,5455,16279847,1851677157,1447571658,96,163,35.35,39.862,37083,Nurse Practitioner,1.0,37066,General Acute Care Hospital,2.0,SUMNER REGIONAL MEDICAL CENTER LLC,Sumner Regional Medical Center,Sumner Regional Medical Center
7,5574,13186861,1851677157,1922319037,45,64,31.047,31.309,37083,Nurse Practitioner,1.0,37030,General Acute Care Hospital,2.0,RIVERVIEW MEDICAL CENTER LLC,Riverview Regional Medical Center,Riverview Regional Medical Center
8,5620,16305736,1851677157,1467763458,39,93,18.28,21.475,37083,Nurse Practitioner,1.0,37074,General Acute Care Hospital,2.0,TROUSDALE MEDICAL CENTER LLC,Trousdale Medical Center,Trousdale Medical Center


In [142]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
            ORDER BY to_groups DESC
        )
        SELECT to_groups, COUNT(to_groups) AS npis_per_count, SUM(npi_transactions) AS total_transactions
        FROM CTE
        GROUP BY to_groups
        ORDER BY to_groups DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_groups,npis_per_count,total_transactions
0,8,1,1176
1,7,3,8666
2,6,4,7075
3,5,20,32393
4,4,55,104130
5,3,185,195780
6,2,673,439836
7,1,2592,827027


## Narrow down to non-Vanderbilt Transactions

In [161]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            GROUP BY from_npi
        )
        SELECT to_groups, COUNT(to_groups) AS npis_per_count, SUM(npi_transactions) AS total_transactions
        FROM CTE
        GROUP BY to_groups
        ORDER BY to_groups DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_groups,npis_per_count,total_transactions
0,7,1,1078
1,6,3,7014
2,5,5,8957
3,4,28,44792
4,3,60,78097
5,2,359,300377
6,1,2025,683361


In [173]:
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            AND from_npi_specialty <> 'Radiology'
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 1000
        )
        SELECT CTE.to_facility_group, n.*, CTE.npi_transactions 
        FROM CTE
        INNER JOIN npidata_nashville AS n 
            ON CTE.from_npi = n.npi
        ORDER BY CTE.npi_transactions DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_facility_group,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5,zip,cbsa,grouping,classification,specialization,npi_transactions
0,HCA,1053337717,1,,KAZA,SUNIL,C,DR.,,M.D.,3443 DICKERSON PIKE,SUITE 430,NASHVILLE,TN,372072519,207RC0000X,37207,37207,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease,2320
1,HCA,1427079946,1,,CONLEY,CHRISTOPHER,N,DR.,,M.D.,3443 DICKERSON PIKE,SUITE 430,NASHVILLE,TN,372072519,207RC0001X,37207,37207,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Clinical Cardiac Electrophysiology,2233
2,HCA,1114961513,1,,HUMPHREY,STEVEN,S,DR.,,MD,395 WALLACE RD,SUITE B300,NASHVILLE,TN,372114881,207RC0000X,37211,37211,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease,2166
3,HCA,1730111642,1,,PATEL,TARAL,NAVINCHANDRA,DR.,,M.D.,5651 FRIST BLVD STE 603,,HERMITAGE,TN,37076,207RI0011X,37076,37076,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Interventional Cardiology,2152
4,HCA,1245313741,1,,LONG,BRIAN,R,DR.,,MD,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372322519,207RI0011X,37232,37232,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Interventional Cardiology,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,Maury Regional Medical Center,1326101924,1,,MAQUILING,KEVIN,M,,,M.D.,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372326433,207RC0000X,37232,37232,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease,1055
59,Maury Regional Medical Center,1710955034,1,,MARSHALL,JAMES,H.,,,M.D.,1222 TROTWOOD AVE,SUITE 601,COLUMBIA,TN,384016436,208800000X,38401,38401,34980,Allopathic & Osteopathic Physicians,Urology,,1051
60,Ascension Saint Thomas,1407896749,1,,CANONICO,ANGELO,E.,,,M.D.,4230 HARDING RD,SUITE 400,NASHVILLE,TN,372052013,207RP1001X,37205,37205,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Pulmonary Disease,1041
61,HCA,1114020336,1,,KINGREE,RACHEL,MARIE,DR.,,M.D.,5651 FRIST BLVD,SUITE 713,HERMITAGE,TN,370762054,207RP1001X,37076,37076,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Pulmonary Disease,1019


In [186]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            AND from_npi_specialty NOT IN ('Radiology', 'Surgery', 'Emergency Medicine', 'Orthopaedic Surgery')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, n.*, CTE.npi_transactions, CTE.npi_patients
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group
            , classification
            , COUNT(to_facility_group) AS count_npis
            , SUM(npi_transactions) AS total_transactions
            , SUM(npi_transactions) / COUNT(to_facility_group) AS transactions_per_npi
            , SUM(npi_patients) AS total_patients
        FROM CTE_2
        GROUP BY to_facility_group, classification
        ORDER BY to_facility_group, SUM(npi_transactions) DESC 
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_facility_group,classification,count_npis,total_transactions,transactions_per_npi,total_patients
0,Ascension Saint Thomas,Internal Medicine,45,39365,874,30435
1,Ascension Saint Thomas,Pathology,5,3196,639,3044
2,Ascension Saint Thomas,Anesthesiology,5,2936,587,2734
3,Ascension Saint Thomas,Nurse Practitioner,3,1683,561,1523
4,Ascension Saint Thomas,Specialist,2,1537,768,1433
5,Ascension Saint Thomas,Family Medicine,2,1333,666,832
6,Ascension Saint Thomas,Psychiatry & Neurology,2,1146,573,1049
7,Ascension Saint Thomas,Urology,1,702,702,403
8,Ascension Saint Thomas,Hospitalist,1,509,509,487
9,HCA,Internal Medicine,75,72537,967,47501


In [169]:
# From below, summarize by specialization
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 1000
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, n.*, CTE.npi_transactions 
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT classification, COUNT(classification), SUM(npi_transactions)
        FROM CTE_2
        GROUP BY classification
        ORDER BY SUM(npi_transactions) DESC 
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,classification,COUNT(classification),SUM(npi_transactions)
0,Radiology,48,118352
1,Internal Medicine,52,75354
2,Family Medicine,2,2519
3,Pathology,2,2437
4,Otolaryngology,2,2203
5,Orthopaedic Surgery,1,1253
6,Surgery,1,1239
7,Nurse Practitioner,1,1113
8,Specialist,1,1090
9,Urology,1,1051


In [146]:
# Find NPIs sending to single facility group
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
        )
        SELECT n.*, CTE.npi_transactions 
        FROM CTE
        INNER JOIN npidata_nashville AS n 
            ON CTE.from_npi = n.npi
        WHERE CTE.to_groups = 1
        ORDER BY CTE.npi_transactions DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5,zip,cbsa,grouping,classification,specialization,npi_transactions
0,1871548818,1,,KLEIN,WILLIAM,J,DR.,,MD,210 25TH AVE N STE 602,,NASHVILLE,TN,37203,2085R0202X,37203,37203,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4900
1,1376756742,1,,GRIFFIN,BENJAMIN,DAVID,,,M.D.,210 25TH AVE N,,NASHVILLE,TN,37203,2085R0202X,37203,37203,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4634
2,1316983695,1,,GUTTENTAG,ADAM,R,,,MD,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372320001,2085R0202X,37232,37232,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4405
3,1740377845,1,,TABER,DAVID,,,,MD,3601 TVC,,NASHVILLE,TN,372320001,2085R0202X,37232,37232,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4378
4,1043302466,1,,BLOCK,JOHN,,,,MD,3601 TVC,,NASHVILLE,TN,372320001,2085R0202X,37232,37232,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,3822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2587,1619291242,1,,KELLEY,SHERRY,L,,,CRNA,110 29TH AVE N STE 202,,NASHVILLE,TN,372031448,367500000X,37203,37203,34980,Physician Assistants & Advanced Practice Nursi...,"Nurse Anesthetist, Certified Registered",,50
2588,1376809590,1,,ZELLER,EMEM,ASUQUO,,,APN NP-C,28 WHITE BRIDGE RD STE 208,,NASHVILLE,TN,372051467,363LF0000X,37205,37205,34980,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Family,50
2589,1992057509,1,,MARCRUM,TRACI,,,,N.P.,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372328802,363LA2100X,37232,37232,34980,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Acute Care,50
2590,1487096459,1,,GARG,RICHA,,DR.,,M.D.,1020 N HIGHLAND AVE,,MURFREESBORO,TN,371302494,207Q00000X,37130,37130,34980,Allopathic & Osteopathic Physicians,Family Medicine,,50


In [99]:
#Filter to Vanderbilt
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT from_npi, COUNT(from_npi) AS to_groups
        FROM referrals
        GROUP BY from_npi
        ORDER BY COUNT(from_npi) DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,from_npi,to_groups
0,1306993282,10
1,1912984758,9
2,1851677157,9
3,1558355941,9
4,1447234141,9
...,...,...
3528,1003071333,1
3529,1003063314,1
3530,1003031261,1
3531,1003019902,1


In [84]:
# Find npi by percentage to Vanderbilt
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH to_vandy AS (
            SELECT from_npi, patient_count, transaction_count
            FROM referrals
            WHERE to_facility_group = 'Vanderbilt University Medical Center'
        ), not_vandy AS (
            SELECT from_npi
            , SUM(patient_count) AS patient_count
            , SUM(transaction_count) AS transaction_count
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            GROUP BY from_npi
        )
        SELECT referrals.from_npi
            , ROUND(100.0 * tv.patient_count/(tv.patient_count + nv.patient_count), 2) AS pct_patients_vandy
            , tv.patient_count AS vandy_patients
            , nv.patient_count AS not_vandy_patients
            , ROUND(100.0 * tv.transaction_count/(tv.transaction_count + nv.transaction_count), 2) AS pct_transactions_vandy
            , tv.transaction_count AS vandy_transactions
            , nv.transaction_count AS not_vandy_transactions
            , from_npi_specialty
        FROM referrals
        LEFT JOIN to_vandy AS tv USING(from_npi)
        LEFT JOIN not_vandy AS nv USING(from_npi)
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,from_npi,pct_patients_vandy,vandy_patients,not_vandy_patients,pct_transactions_vandy,vandy_transactions,not_vandy_transactions,from_npi_specialty
0,1013179860,6.12,76.0,1165.0,5.37,81.0,1426.0,Internal Medicine
1,1336126887,,,396.0,,,560.0,Urology
2,1336230424,8.38,60.0,656.0,6.26,77.0,1153.0,Internal Medicine
3,1346288966,,,998.0,,,1454.0,Specialist
4,1326086653,,,1728.0,,,1969.0,Internal Medicine
...,...,...,...,...,...,...,...,...
5835,1639275589,,,471.0,,,658.0,Radiology
5836,1649386467,,,440.0,,,771.0,Orthopaedic Surgery
5837,1700823713,,,435.0,,,679.0,Family Medicine
5838,1699149856,,,209.0,,,301.0,Nurse Practitioner


In [47]:
# Find interesting npi_specialty
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT from_npi_specialty AS classification, COUNT(from_npi_specialty) AS classification_count
        From referrals
        GROUP BY from_npi_specialty
        ORDER BY COUNT(from_npi_specialty) DESC
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,classification,classification_count
0,Internal Medicine,10774
1,Radiology,5446
2,Nurse Practitioner,3025
3,Family Medicine,2536
4,Emergency Medicine,1399
5,"Nurse Anesthetist, Certified Registered",1231
6,Orthopaedic Surgery,1144
7,Anesthesiology,1017
8,Surgery,869
9,Psychiatry & Neurology,778


In [48]:
# Find interesting Specializations (can be filtered by specialty)
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT npidata_nashville.specialization, COUNT(npidata_nashville.specialization)
        From referrals
        JOIN npidata_nashville
        ON referrals.from_npi = npidata_nashville.npi
        WHERE referrals.from_npi_specialty = 'Nurse Practitioner'
        GROUP BY npidata_nashville.specialization
        ORDER BY COUNT(npidata_nashville.specialization) DESC
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,specialization,COUNT(npidata_nashville.specialization)
0,Family,1463
1,Acute Care,397
2,Adult Health,310
3,Psychiatric/Mental Health,131
4,Women's Health,75
5,Gerontology,52
6,Primary Care,17
7,Obstetrics & Gynecology,3
8,Critical Care Medicine,1
9,,0


In [46]:
# Find interesting Groupings (can be filtered by specialty)
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT npidata_nashville.grouping, COUNT(npidata_nashville.grouping)
        From referrals
        JOIN npidata_nashville
        ON referrals.from_npi = npidata_nashville.npi
        WHERE referrals.from_npi_specialty = 'Anesthesiology'
        GROUP BY npidata_nashville.grouping
        ORDER BY COUNT(npidata_nashville.grouping) DESC
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,grouping,COUNT(npidata_nashville.grouping)
0,Allopathic & Osteopathic Physicians,1017


In [18]:
# Find NPIs with largest number of referrals
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT SUM(transaction_count) AS total_referrals, to_npi, n.provider_org_name
    FROM filtered_hop_teaming AS f
    JOIN npidata AS n
    ON f.to_npi = n.npi
    GROUP BY to_npi
    ORDER BY total_referrals DESC
    LIMIT 20;
    """ 
    
    test = pd.read_sql(query, db)

test

Unnamed: 0,total_referrals,to_npi,provider_org_name
0,901945,1104202761,VANDERBILT UNIVERSITY MEDICAL CENTER
1,635811,1437194669,SAINT THOMAS MEDICAL PARTNERS
2,603385,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER
3,598277,1093741464,"ADVANCED DIAGNOSTIC IMAGING, PC"
4,526917,1861478489,RADIOLOGY ALLIANCE PC
5,445897,1003863580,"ASSOCIATED PATHOLOGISTS, LLC"
6,323305,1245393057,CENTENNIAL HEART LLC
7,245119,1215932413,"ANESTHESIA MEDICAL GROUP, PC"
8,240693,1811955917,TENNESSEE ONCOLOGY PLLC
9,228282,1235186800,"PATHGROUP LABS, LLC"


In [None]:
# Find competitor hosptitals with the largest number of total referrals.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT SUM(transaction_count) AS total_referrals, to_npi, n.provider_org_name, t.classification
    FROM filtered_hop_teaming AS f
    JOIN npidata AS n
    ON f.to_npi = n.npi
    JOIN taxonomy AS t
    USING(taxonomy_code)
    WHERE classification = 'Family Medicine'
    GROUP BY to_npi, provider_org_name
    --HAVING provider_org_name LIKE '%VUMC%'
    ORDER BY total_referrals DESC
    LIMIT 50;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Find competitor hosptitals with the largest number of total referrals.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT classification, SUM(transaction_count)
    FROM filtered_hop_teaming AS f
    JOIN npidata AS n
    ON f.to_npi = n.npi
    JOIN taxonomy AS t
    USING(taxonomy_code)
    --WHERE provider_org_name LIKE '%VANDERBILT%'
    GROUP BY classification
    ORDER BY SUM(transaction_count) DESC;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Find addresses of all Vanderbilt locations
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM npidata
    WHERE provider_org_name LIKE '%VANDERBILT%'
    GROUP BY provider_business_address_1
        , provider_business_city
        , provider_business_state
        , provider_business_zip5
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Find competitor hosptitals with the largest number of total referrals.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    WITH vandy_address AS (
        SELECT *
        FROM npidata
        WHERE provider_org_name LIKE '%VANDERBILT%'
        GROUP BY provider_business_address_1
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
    )
    SELECT *
    FROM npidata
    --JOIN vandy_address AS va USING(npi)
    WHERE provider_org_name NOT LIKE '%VANDERBILT%'
    AND provider_business_address_1 IN (SELECT provider_business_address_1 FROM vandy_address)
    AND provider_business_city IN (SELECT provider_business_city FROM vandy_address)
    AND provider_business_zip5 IN (SELECT provider_business_zip5 FROM vandy_address)
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# TESTING SQUARE
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT provider_business_state, COUNT(provider_business_state)
    FROM npidata
    GROUP BY provider_business_state
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# TESTING SQUARE
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM npidata
    LIMIT 1;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Tingting's code for exporting to Neo4j

nodes = list(set(ht.from_npi.tolist() + ht.to_npi.tolist()))
node_df = pd.DataFrame({'npi:ID': nodes})
node_df[':LABEL'] = "Provider"
node_df.to_csv('import/nodes.csv', index = False)
edges = pd.DataFrame({':START_ID' : ht.from_npi, 'patient_count': ht.patient_count, 
                      'transaction_count': ht.transaction_count, 
                     ':END_ID' : ht.to_npi})
edges[':TYPE'] = 'REFERRED_TO'
edges.to_csv('import/edges.csv', index = False)

In [None]:
# Test to confirm the tables loaded.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM taxonomy
    LIMIT 1;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Test to confirm the tables loaded.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT DISTINCT classification
    FROM taxonomy
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Test to confirm the tables loaded.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT npidata.*
    FROM npidata
    JOIN taxonomy USING (taxonomy_code)
    WHERE classification = 'train'
    """ 
    
    test = pd.read_sql(query, db)

test

In [14]:
referrals = pd.read_csv('../data/nashville_referrals_normalised.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [17]:
referrals.head(10)

Unnamed: 0.1,Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_zip,from_npi_specialty,from_entity_type_code,to_zip,to_npi_specialty,to_entity_type_code,to_facility,to_facility_group,to_facility_name_normalised
0,0,20662650,1780832899,1245233220,62,63,2.667,20.412,37203,Anesthesiology,1.0,37115,Clinic/Center,2.0,,,
1,1,16742214,1851362628,1790788040,26,65,3.231,18.277,37129,Internal Medicine,1.0,37129,"Hospice Care, Community Based",2.0,,,
2,2,6532675,1396753356,1609879956,27,68,4.529,27.525,37174,Internal Medicine,1.0,37214,"Hospice Care, Community Based",2.0,,,
3,3,18197644,1144264458,1609879956,36,77,3.247,27.359,37067,Family Medicine,1.0,37214,"Hospice Care, Community Based",2.0,,,
4,4,26837443,1942347513,1609879956,36,53,21.509,40.591,37217,Nurse Practitioner,1.0,37214,"Hospice Care, Community Based",2.0,,,
5,5,832492,1003833872,1568464873,49,117,19.983,26.492,37055,Counselor,1.0,37055,Family Medicine,2.0,,,
6,6,832494,1003858267,1568464873,50,63,17.698,34.482,37055,Hospitalist,1.0,37055,Family Medicine,2.0,,,
7,7,832495,1003862566,1568464873,91,94,42.968,47.179,37072,Radiology,1.0,37055,Family Medicine,2.0,,,
8,8,832497,1013226026,1568464873,165,239,42.209,54.333,37055,Nurse Practitioner,1.0,37055,Family Medicine,2.0,,,
9,9,832499,1003991167,1568464873,35,67,21.94,24.913,37203,Ophthalmology,1.0,37055,Family Medicine,2.0,,,
