In [230]:
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

In [231]:
pd.set_option("display.max_columns", 500)

In [232]:
#Listing currently existing tables in the database
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT name
        FROM sqlite_master 
        WHERE type ='table' 
        AND name NOT LIKE 'sqlite_%';
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,name
0,taxonomy
1,hop_teaming
2,cbsa
3,npidata
4,npidata_nashville
5,filtered_hop_teaming
6,hospitals
7,referrals


In [255]:
#See number of rows
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT COUNT(DISTINCT from_npi)
        From referrals
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,COUNT(DISTINCT from_npi)
0,4225


In [234]:
#See first row
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT *
        From referrals
        LIMIT 1
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0.1,Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_zip,from_npi_specialty,from_entity_type_code,to_zip,to_npi_specialty,to_entity_type_code,to_facility,to_facility_group,to_facility_name_normalised
0,0,615039,1013179860,1417938846,71,82,35.049,42.548,37075,Internal Medicine,1.0,37083,General Acute Care Hospital,2.0,"MACON COUNTY GENERAL HOSPITAL, INC.",Macon County General Hospital,Macon County General Hospital


## Look at Facility Groups, Classifications, and Specializations

In [235]:
# Look at facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT DISTINCT(to_facility_group)
        FROM referrals
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_facility_group
0,Macon County General Hospital
1,Maury Regional Medical Center
2,HCA
3,Ascension Saint Thomas
4,Vanderbilt University Medical Center
5,Williamson Medical Center
6,NorthCrest Medical Center
7,Nashville General Hosptial
8,Sumner Regional Medical Center
9,Riverview Regional Medical Center


In [236]:
# Look at categories
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT DISTINCT(from_npi_specialty), SUM(transaction_count)
        FROM referrals
        GROUP BY from_npi_specialty
        ORDER BY SUM(transaction_count) DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,from_npi_specialty,SUM(transaction_count)
0,Internal Medicine,678099
1,Radiology,397137
2,Nurse Practitioner,121343
3,Family Medicine,102980
4,Emergency Medicine,81652
5,Pathology,62149
6,Orthopaedic Surgery,56882
7,Anesthesiology,50313
8,"Nurse Anesthetist, Certified Registered",49618
9,Surgery,42601


In [237]:
# Look at specializations
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT DISTINCT(taxonomy.specialization), SUM(referrals.transaction_count)
        FROM referrals
        JOIN npidata_nashville
            ON referrals.from_npi = npidata_nashville.npi
        JOIN taxonomy
            ON npidata_nashville.taxonomy_code = taxonomy.taxonomy_code
        GROUP BY taxonomy.specialization
        ORDER BY SUM(referrals.transaction_count) DESC
        LIMIT 20
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,specialization,SUM(referrals.transaction_count)
0,,723236
1,Diagnostic Radiology,372253
2,Cardiovascular Disease,176898
3,Anatomic Pathology & Clinical Pathology,52868
4,Family,52464
5,Nephrology,50284
6,Pulmonary Disease,42945
7,Hematology & Oncology,39344
8,Gastroenterology,31214
9,Neurology,28596


In [238]:
# Look at classifications and specializations together
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT taxonomy.classification, taxonomy.specialization, SUM(referrals.transaction_count)
        FROM referrals
        JOIN npidata_nashville
            ON referrals.from_npi = npidata_nashville.npi
        JOIN taxonomy
            ON npidata_nashville.taxonomy_code = taxonomy.taxonomy_code
        GROUP BY taxonomy.classification, taxonomy.specialization
        ORDER BY SUM(referrals.transaction_count) DESC
        LIMIT 50
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,classification,specialization,SUM(referrals.transaction_count)
0,Radiology,Diagnostic Radiology,372253
1,Internal Medicine,,190454
2,Internal Medicine,Cardiovascular Disease,176898
3,Family Medicine,,97117
4,Emergency Medicine,,73595
5,Pathology,Anatomic Pathology & Clinical Pathology,52868
6,Nurse Practitioner,Family,52464
7,Internal Medicine,Nephrology,50284
8,"Nurse Anesthetist, Certified Registered",,49618
9,Internal Medicine,Pulmonary Disease,42945


## This is my starting code for NPI to Facility Groups

In [239]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
            ORDER BY to_groups DESC
        )
        SELECT *
        FROM CTE
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,from_npi,to_groups,npi_transactions
0,1851677157,8,1176
1,1558355941,7,2574
2,1104933738,7,4570
3,1104837327,7,1522
4,1902823099,6,1336
...,...,...,...
4220,1003063314,1,374
4221,1003050972,1,145
4222,1003031261,1,104
4223,1003019902,1,294


In [240]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
            ORDER BY to_groups DESC
        )
        SELECT SUM(npi_transactions) AS total_transactions
        FROM CTE
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,total_transactions
0,1936936


In [241]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT SUM(transaction_count) AS total_transactions
        FROM referrals
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,total_transactions
0,1936936


In [242]:
# Crosscheck with random npi
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT *
        FROM referrals
        WHERE from_npi = '1851677157'
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0.1,Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_zip,from_npi_specialty,from_entity_type_code,to_zip,to_npi_specialty,to_entity_type_code,to_facility,to_facility_group,to_facility_name_normalised
0,167,13101574,1851677157,1861479545,229,316,49.041,49.564,37083,Nurse Practitioner,1.0,38401,General Acute Care Hospital,2.0,MAURY REGIONAL HOSPITAL,Maury Regional Medical Center,Maury Regional Medical Center
1,1115,16079213,1851677157,1295780476,93,158,36.753,45.089,37083,Nurse Practitioner,1.0,37207,General Acute Care Hospital,2.0,HTI MEMORIAL HOSPITAL CORPORATION,HCA,TriStar Skyline Medical Center HCA
2,2290,15715727,1851677157,1023055126,72,90,47.956,41.384,37083,Nurse Practitioner,1.0,37203,General Acute Care Hospital,2.0,"HCA HEALTH SERVICES OF TENNESSEE, INC.",HCA,Centennial Medical Center HCA
3,3032,16034734,1851677157,1265445506,37,65,33.985,35.362,37083,Nurse Practitioner,1.0,37067,General Acute Care Hospital,2.0,WILLIAMSON COUNTY HOSPITAL DISTRICT,Williamson Medical Center,Williamson County Hospital
4,3697,16567504,1851677157,1669567897,94,129,42.822,43.584,37083,Nurse Practitioner,1.0,37172,General Acute Care Hospital,2.0,NORTHCREST MEDICAL CENTER,NorthCrest Medical Center,NorthCrest Medical Center
5,5019,16213508,1851677157,1396882205,76,98,44.429,42.415,37083,Nurse Practitioner,1.0,37232,General Acute Care Hospital,2.0,VANDERBILT UNIVERSITY MEDICAL CENTER,Vanderbilt University Medical Center,Vanderbilt University Medical Center
6,6228,16279847,1851677157,1447571658,96,163,35.35,39.862,37083,Nurse Practitioner,1.0,37066,General Acute Care Hospital,2.0,SUMNER REGIONAL MEDICAL CENTER LLC,Sumner Regional Medical Center,Sumner Regional Medical Center
7,6357,13186861,1851677157,1922319037,45,64,31.047,31.309,37083,Nurse Practitioner,1.0,37030,General Acute Care Hospital,2.0,RIVERVIEW MEDICAL CENTER LLC,Riverview Regional Medical Center,Riverview Regional Medical Center
8,6411,16305736,1851677157,1467763458,39,93,18.28,21.475,37083,Nurse Practitioner,1.0,37074,General Acute Care Hospital,2.0,TROUSDALE MEDICAL CENTER LLC,Trousdale Medical Center,Trousdale Medical Center


In [243]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
            ORDER BY to_groups DESC
        )
        SELECT to_groups, COUNT(to_groups) AS npis_per_count, SUM(npi_transactions) AS total_transactions
        FROM CTE
        GROUP BY to_groups
        ORDER BY to_groups DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_groups,npis_per_count,total_transactions
0,8,1,1176
1,7,3,8666
2,6,4,7075
3,5,20,32393
4,4,56,106690
5,3,199,217867
6,2,709,481393
7,1,3233,1081676


## Narrow down to non-Vanderbilt Transactions

In [244]:
# Find count of NPIs sending to multiples of facility groups
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            GROUP BY from_npi
        )
        SELECT to_groups, COUNT(to_groups) AS npis_per_count, SUM(npi_transactions) AS total_transactions
        FROM CTE
        GROUP BY to_groups
        ORDER BY to_groups DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_groups,npis_per_count,total_transactions
0,7,1,1078
1,6,3,7014
2,5,5,8957
3,4,28,44792
4,3,61,78565
5,2,385,328801
6,1,2380,813604


In [245]:
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            AND from_npi_specialty <> 'Radiology'
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 1000
        )
        SELECT CTE.to_facility_group, n.*, CTE.npi_transactions 
        FROM CTE
        INNER JOIN npidata_nashville AS n 
            ON CTE.from_npi = n.npi
        ORDER BY CTE.npi_transactions DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_facility_group,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5,zip,cbsa,grouping,classification,specialization,npi_transactions
0,HCA,1053337717,1,,KAZA,SUNIL,C,DR.,,M.D.,3443 DICKERSON PIKE,SUITE 430,NASHVILLE,TN,372072519,207RC0000X,37207,37207,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease,2320
1,HCA,1427079946,1,,CONLEY,CHRISTOPHER,N,DR.,,M.D.,3443 DICKERSON PIKE,SUITE 430,NASHVILLE,TN,372072519,207RC0001X,37207,37207,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Clinical Cardiac Electrophysiology,2233
2,HCA,1114961513,1,,HUMPHREY,STEVEN,S,DR.,,MD,395 WALLACE RD,SUITE B300,NASHVILLE,TN,372114881,207RC0000X,37211,37211,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease,2166
3,HCA,1730111642,1,,PATEL,TARAL,NAVINCHANDRA,DR.,,M.D.,5651 FRIST BLVD STE 603,,HERMITAGE,TN,37076,207RI0011X,37076,37076,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Interventional Cardiology,2152
4,HCA,1245313741,1,,LONG,BRIAN,R,DR.,,MD,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372322519,207RI0011X,37232,37232,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Interventional Cardiology,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,Ascension Saint Thomas,1407896749,1,,CANONICO,ANGELO,E.,,,M.D.,4230 HARDING RD,SUITE 400,NASHVILLE,TN,372052013,207RP1001X,37205,37205,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Pulmonary Disease,1041
69,NorthCrest Medical Center,1679557565,1,,CRUNK,TOMMY,HAMBLEN,,,M.D.,224 NORTHCREST DR,,SPRINGFIELD,TN,371723962,207Q00000X,37172,37172,34980,Allopathic & Osteopathic Physicians,Family Medicine,,1040
70,HCA,1114020336,1,,KINGREE,RACHEL,MARIE,DR.,,M.D.,5651 FRIST BLVD,SUITE 713,HERMITAGE,TN,370762054,207RP1001X,37076,37076,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Pulmonary Disease,1019
71,NorthCrest Medical Center,1033472592,1,,ANANI,LOVE,,,,M.D,100 NORTHCREST DR,,SPRINGFIELD,TN,371723927,207P00000X,37172,37172,34980,Allopathic & Osteopathic Physicians,Emergency Medicine,,1011


In [246]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            AND from_npi_specialty NOT IN ('Radiology', 'Surgery', 'Emergency Medicine', 'Orthopaedic Surgery')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, n.*, CTE.npi_transactions, CTE.npi_patients
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group
            , classification
            , COUNT(to_facility_group) AS count_npis
            , SUM(npi_transactions) AS total_transactions
            , SUM(npi_transactions) / COUNT(to_facility_group) AS transactions_per_npi
            , SUM(npi_patients) AS total_patients
        FROM CTE_2
        GROUP BY to_facility_group, classification
        ORDER BY to_facility_group, SUM(npi_transactions) DESC 
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,to_facility_group,classification,count_npis,total_transactions,transactions_per_npi,total_patients
0,Ascension Saint Thomas,Internal Medicine,49,42746,872,32572
1,Ascension Saint Thomas,Specialist,5,3941,788,3656
2,Ascension Saint Thomas,Pathology,5,3196,639,3044
3,Ascension Saint Thomas,Anesthesiology,5,2936,587,2734
4,Ascension Saint Thomas,Nurse Practitioner,3,1683,561,1523
5,Ascension Saint Thomas,Family Medicine,2,1333,666,832
6,Ascension Saint Thomas,Psychiatry & Neurology,2,1146,573,1049
7,Ascension Saint Thomas,Urology,1,702,702,403
8,Ascension Saint Thomas,Hospitalist,1,509,509,487
9,HCA,Internal Medicine,86,80654,937,52784


## Create Visual of NPIs to Poach

In [381]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            AND from_npi_specialty NOT IN ('Radiology', 'Emergency Medicine', 'Orthopaedic Surgery',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , COALESCE(specialization, 'None') AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
        FROM CTE_2
        ORDER BY to_facility_group
        """ 

    poach = pd.read_sql(query, db)

display(poach)

Unnamed: 0,Facility_Group,npi,Name,Classification,Specialization,NPI_Transactions,NPI_Patients,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5
0,Ascension Saint Thomas,1770519704,DAVID GIBSON,Internal Medicine,Cardiovascular Disease,1648,1454,4230 HARDING RD,SUITE 330,NASHVILLE,TN,37205
1,Ascension Saint Thomas,1720241185,WARREN STRIBLING,Internal Medicine,Advanced Heart Failure and Transplant Cardiology,1648,711,4230 HARDING RD.,STE. 330,NASHVILLE,TN,37205
2,Ascension Saint Thomas,1598719536,DON CHOMSKY,Internal Medicine,Cardiovascular Disease,1621,701,4230 HARDING RD,SUITE 330,NASHVILLE,TN,37205
3,Ascension Saint Thomas,1457317257,GUY MIOTON,Internal Medicine,Cardiovascular Disease,1458,1155,1840 MEDICAL CENTER PKWY,SUITE 201,MURFREESBORO,TN,37129
4,Ascension Saint Thomas,1942429816,ANDREW ZURICK,Internal Medicine,Cardiovascular Disease,1421,1265,4230 HARDING RD.,SUITE 330,NASHVILLE,TN,37205
...,...,...,...,...,...,...,...,...,...,...,...,...
279,Williamson Medical Center,1346397130,PAUL FLESER,Surgery,Vascular Surgery,570,330,100 COVEY DR,SUITE 204,FRANKLIN,TN,37067
280,Williamson Medical Center,1952771941,JAYANTHI SAMUEL,Nurse Practitioner,Family,519,467,4323 CAROTHERS PKWY,SUITE 205,FRANKLIN,TN,37067
281,Williamson Medical Center,1033246640,OUIDA COLLINS,Family Medicine,,510,407,3601 TVC,,NASHVILLE,TN,37232
282,Williamson Medical Center,1063864775,PAULA DUNN,Family Medicine,,504,237,4091 MALLORY LN,,FRANKLIN,TN,37067


In [382]:
fig = px.treemap(poach, path=['Facility_Group', 'Classification', 'Name'], values='NPI_Transactions',
                color='NPI_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
fig.show()
fig.write_html("/Users/mattparker/Documents/nss_projects/hcbb_hop_team-3m/notebooks/poaching_plotly.html")

# Alvin Request #1

In [383]:
fig_1 = px.treemap(poach, path=['Classification', 'Specialization', 'Name'], values='NPI_Patients',
                color='NPI_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
fig_1.show()

# Alvin Request #2

In [400]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE from_npi_specialty NOT IN ('Radiology', 'Emergency Medicine', 'Orthopaedic Surgery',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , COALESCE(specialization, 'None') AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
            , SUM(NPI_Patients) AS Total_Patients
        FROM CTE_2
        GROUP BY to_facility_group, classification, specialization
        """ 

    poach2 = pd.read_sql(query, db)

display(poach2)

Unnamed: 0,Facility_Group,npi,Name,Classification,Specialization,NPI_Transactions,NPI_Patients,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5,Total_Patients
0,Ascension Saint Thomas,1700823713,JEFF TODD,Family Medicine,,679,435,205 S MCCRARY ST,,WOODBURY,TN,37190,832
1,Ascension Saint Thomas,1013273044,MATTHEW JOSEPH,Hospitalist,,509,487,4220 HARDING PIKE,SUITE 500,NASHVILLE,TN,37205,487
2,Ascension Saint Thomas,1275828089,ZAKARIA BOTROS,Internal Medicine,,693,623,1700 MEDICAL CENTER PKWY,,MURFREESBORO,TN,37129,3065
3,Ascension Saint Thomas,1720241185,WARREN STRIBLING,Internal Medicine,Advanced Heart Failure and Transplant Cardiology,1648,711,4230 HARDING RD.,STE. 330,NASHVILLE,TN,37205,1879
4,Ascension Saint Thomas,1770519704,DAVID GIBSON,Internal Medicine,Cardiovascular Disease,1648,1454,4230 HARDING RD,SUITE 330,NASHVILLE,TN,37205,14706
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Vanderbilt University Medical Center,1922107358,SAM CHANG,Urology,,1441,733,3601 TVC,,NASHVILLE,TN,37232,3442
100,Williamson Medical Center,1053638148,LEVI BENSON,Hospitalist,,716,558,4321 CAROTHERS PKWY,,FRANKLIN,TN,37067,558
101,Williamson Medical Center,1952771941,JAYANTHI SAMUEL,Nurse Practitioner,Family,519,467,4323 CAROTHERS PKWY,SUITE 205,FRANKLIN,TN,37067,467
102,Williamson Medical Center,1750327052,CARY PULLIAM,Surgery,Vascular Surgery,691,385,4601 CAROTHERS PKWY,STE 375,FRANKLIN,TN,37067,715


In [401]:
fig_2 = px.treemap(poach2, path=['Facility_Group', 'Classification', 'Specialization'], values='Total_Patients',
                color='Total_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
fig_2.show()

# Alvin Request #3

In [404]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE from_npi_specialty NOT IN ('Radiology', 'Emergency Medicine', 'Orthopaedic Surgery',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , COALESCE(specialization, 'None') AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
            , SUM(NPI_Patients) AS Total_Patients
        FROM CTE_2
        GROUP BY classification, specialization, to_facility_group
        """ 

    poach3 = pd.read_sql(query, db)

display(poach3)

Unnamed: 0,Facility_Group,npi,Name,Classification,Specialization,NPI_Transactions,NPI_Patients,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5,Total_Patients
0,Vanderbilt University Medical Center,1265530117,JAMI MILLER,Dermatology,,836,523,719 THOMPSON LN,SUITE 26300,NASHVILLE,TN,37204,1873
1,Ascension Saint Thomas,1700823713,JEFF TODD,Family Medicine,,679,435,205 S MCCRARY ST,,WOODBURY,TN,37190,832
2,HCA,1467447664,MATTHEW BRUST,Family Medicine,,1098,278,397 WALLACE RD,SUITE 100,NASHVILLE,TN,37211,2756
3,Maury Regional Medical Center,1932188554,DAVID TURNER,Family Medicine,,767,500,1114 W 7TH ST,,COLUMBIA,TN,38401,500
4,Riverview Regional Medical Center,1346345154,BOWDOIN SMITH,Family Medicine,,785,201,9 MAGGART CIR,,CARTHAGE,TN,37030,201
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Ascension Saint Thomas,1649481938,BENJAMIN DEHNER,Urology,,702,403,4230 HARDING RD,SUITE 521,NASHVILLE,TN,37205,403
100,HCA,1720245301,CHRISTOPHER HAWKINS,Urology,,529,321,5651 FRIST BLVD.,STE 616,HERMITAGE,TN,37076,321
101,Maury Regional Medical Center,1710955034,JAMES MARSHALL,Urology,,1051,600,1222 TROTWOOD AVE,SUITE 601,COLUMBIA,TN,38401,1165
102,Vanderbilt University Medical Center,1922107358,SAM CHANG,Urology,,1441,733,3601 TVC,,NASHVILLE,TN,37232,3442


In [405]:
fig_2 = px.treemap(poach3, path=['Classification', 'Specialization', 'Facility_Group'], values='Total_Patients',
                color='Total_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
fig_2.show()

# CURRENT PROJECT

In [372]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH vandy_selector AS (
            SELECT from_npi, patient_count, CASE
                WHEN to_facility_group = 'Vanderbilt University Medical Center' THEN 'vandy'
                ELSE 'not_vandy' END AS to_vandy,
                SUM(patient_count) OVER(PARTITION BY from_npi) AS total_patients
            FROM referrals
        ), percents AS (
            SELECT from_npi AS npi
                , patient_count AS non_vandy_patients
                , total_patients
                , ROUND(100.0 * patient_count/total_patients, 2) AS pct_competition
            FROM vandy_selector
            WHERE to_vandy = 'not_vandy'
            GROUP BY from_npi, to_vandy
        ), npi_details AS (
            SELECT npi
                , provider_first_name || ' ' || provider_last_name AS Name
                , classification AS Classification
                , COALESCE(specialization, 'None') AS Specialization
                , provider_business_address_1
                , provider_business_address_2
                , provider_business_city
                , provider_business_state
                , provider_business_zip5
            FROM npidata_nashville
            GROUP BY npi
        )
        SELECT *
        FROM percents
        JOIN npi_details USING(npi)
        WHERE classification NOT IN ('Radiology', 'Emergency Medicine', 
                                    'Orthopaedic Surgery','Pathology', 'Anesthesiology')
        AND total_patients >= 500
        """ 

    percents_treemap = pd.read_sql(query, db)

display(percents_treemap)

Unnamed: 0,npi,non_vandy_patients,total_patients,pct_competition,Name,Classification,Specialization,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5
0,1003819277,91,675,13.48,JOHN CAGE,Internal Medicine,Cardiovascular Disease,222 22ND AVE N,STE 400,NASHVILLE,TN,37203
1,1013179860,71,1241,5.72,GARY YAWN,Internal Medicine,Interventional Cardiology,353 NEW SHACKLE ISLAND RD STE 300C,,HENDERSONVILLE,TN,37075
2,1013908730,462,548,84.31,JOSEPH PARKER,Internal Medicine,Gastroenterology,222 22ND AVE N,,NASHVILLE,TN,37203
3,1013958776,487,1044,46.65,AHMAD ABU-HALIMAH,Internal Medicine,Cardiovascular Disease,VANDERBILT UNIVERSITY MEDICAL CTR. CARDIOLOGY ...,"383 PRB, 2220 PIERCE AVE.",NASHVILLE,TN,37232
4,1023000999,227,626,36.26,CLARA WOMACK,Internal Medicine,Nephrology,393 WALLACE RD,STE. 203,NASHVILLE,TN,37211
...,...,...,...,...,...,...,...,...,...,...,...,...
310,1962656322,511,511,100.00,CANDICE OLECHOWSKI,Internal Medicine,,1224 TROTWOOD AVE,,COLUMBIA,TN,38401
311,1972658060,1770,1878,94.25,TERRY KETCH,Internal Medicine,Cardiovascular Disease,3443 DICKERSON PIKE STE 430,,NASHVILLE,TN,37207
312,1982661617,96,680,14.12,CHRISTIE GREEN,Internal Medicine,Nephrology,1617 WILLIAMS DR,STE. 200,MURFREESBORO,TN,37129
313,1982796306,247,752,32.85,MATTHEW ABBATE,Internal Medicine,,3601 TVC,,NASHVILLE,TN,37232


In [379]:
fig = px.treemap(percents_treemap, path=['Classification', 'Specialization', 'Name'], values='non_vandy_patients',
                color='pct_competition', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5', 'total_patients'],
                color_continuous_scale='Emrld')
fig.show()

In [None]:
# From below, summarize by Classification
# Find classifications with largest number of transactions to non_vandy
# Find percentage of non_Vandhy
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            AND from_npi_specialty NOT IN ('Radiology', 'Emergency Medicine', 'Orthopaedic Surgery',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , specialization AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
        FROM CTE_2
        ORDER BY to_facility_group
        """ 

    poach_two = pd.read_sql(query, db)

display(poach_two)

In [260]:
# From below, summarize by Classification
# Find classifications with largest number of transactions to non_vandy
# Find percentage of non_Vandhy
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE from_npi_specialty NOT IN ('Radiology', 'Emergency Medicine', 'Orthopaedic Surgery',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi, 
        )
        SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
        FROM CTE
        INNER JOIN npidata_nashville AS n 
            ON CTE.from_npi = n.npi
        ORDER BY CTE.npi_transactions DESC
        """ 

    poach_two = pd.read_sql(query, db)

display(poach_two)

Unnamed: 0,to_facility_group,npi_transactions,npi_patients,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5,zip,cbsa,grouping,classification,specialization
0,HCA,6823,5195,1417131715,1,,RIDDICK,JOHN,ALSTON,DR.,,M.D.,2400 PATTERSON ST,SUITE 502,NASHVILLE,TN,372031562,207RI0011X,37203,37203,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Interventional Cardiology
1,Maury Regional Medical Center,6027,1822,1174517593,1,,NEUSS,MICHAEL,NORBERT,DR.,,M.D.,"2220 PIERCE AVENUE, VANDERBILT INGRAM CANCER C...",VANDERBILT UNIVERSITY MEDICAL CENTR,NASHVILLE,TN,372326683,207RH0003X,37232,37232,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Hematology & Oncology
2,Macon County General Hospital,4570,3091,1104933738,1,,LEE,JUNG,H.,,,M.D.,353 NEW SHACKLE ISLAND RD,#300C,HENDERSONVILLE,TN,370752379,207RC0000X,37075,37075,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease
3,Macon County General Hospital,3845,2521,1790891315,1,,RUSSO,DONALD,J,,,M.D.,353 NEW SHACKLE ISLAND RD,#300C,HENDERSONVILLE,TN,370752379,207RC0000X,37075,37075,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease
4,Macon County General Hospital,3688,2542,1679689285,1,,CALLISTER,TRACY,Q.,,,M.D.,353 NEW SHACKLE ISLAND RD,#300C,HENDERSONVILLE,TN,370752379,207RC0000X,37075,37075,34980,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3351,HCA,50,50,1063742229,1,,WOOTEN,SARAH,ANN,,,CRNA,110 29TH AVE N,STE 202,NASHVILLE,TN,372031401,367500000X,37203,37203,34980,Physician Assistants & Advanced Practice Nursi...,"Nurse Anesthetist, Certified Registered",
3352,HCA,50,49,1619291242,1,,KELLEY,SHERRY,L,,,CRNA,110 29TH AVE N STE 202,,NASHVILLE,TN,372031448,367500000X,37203,37203,34980,Physician Assistants & Advanced Practice Nursi...,"Nurse Anesthetist, Certified Registered",
3353,Ascension Saint Thomas,50,29,1376809590,1,,ZELLER,EMEM,ASUQUO,,,APN NP-C,28 WHITE BRIDGE RD STE 208,,NASHVILLE,TN,372051467,363LF0000X,37205,37205,34980,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Family
3354,Vanderbilt University Medical Center,50,46,1992057509,1,,MARCRUM,TRACI,,,,N.P.,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372328802,363LA2100X,37232,37232,34980,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Acute Care


In [None]:
IsNull(FieldName, 0)


In [169]:
# From below, summarize by specialization
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 1000
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, n.*, CTE.npi_transactions 
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT classification, COUNT(classification), SUM(npi_transactions)
        FROM CTE_2
        GROUP BY classification
        ORDER BY SUM(npi_transactions) DESC 
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,classification,COUNT(classification),SUM(npi_transactions)
0,Radiology,48,118352
1,Internal Medicine,52,75354
2,Family Medicine,2,2519
3,Pathology,2,2437
4,Otolaryngology,2,2203
5,Orthopaedic Surgery,1,1253
6,Surgery,1,1239
7,Nurse Practitioner,1,1113
8,Specialist,1,1090
9,Urology,1,1051


In [146]:
# Find NPIs sending to single facility group
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi, COUNT(DISTINCT to_facility_group) AS to_groups, SUM(transaction_count) AS npi_transactions
            FROM referrals
            GROUP BY from_npi
        )
        SELECT n.*, CTE.npi_transactions 
        FROM CTE
        INNER JOIN npidata_nashville AS n 
            ON CTE.from_npi = n.npi
        WHERE CTE.to_groups = 1
        ORDER BY CTE.npi_transactions DESC
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5,zip,cbsa,grouping,classification,specialization,npi_transactions
0,1871548818,1,,KLEIN,WILLIAM,J,DR.,,MD,210 25TH AVE N STE 602,,NASHVILLE,TN,37203,2085R0202X,37203,37203,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4900
1,1376756742,1,,GRIFFIN,BENJAMIN,DAVID,,,M.D.,210 25TH AVE N,,NASHVILLE,TN,37203,2085R0202X,37203,37203,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4634
2,1316983695,1,,GUTTENTAG,ADAM,R,,,MD,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372320001,2085R0202X,37232,37232,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4405
3,1740377845,1,,TABER,DAVID,,,,MD,3601 TVC,,NASHVILLE,TN,372320001,2085R0202X,37232,37232,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,4378
4,1043302466,1,,BLOCK,JOHN,,,,MD,3601 TVC,,NASHVILLE,TN,372320001,2085R0202X,37232,37232,34980,Allopathic & Osteopathic Physicians,Radiology,Diagnostic Radiology,3822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2587,1619291242,1,,KELLEY,SHERRY,L,,,CRNA,110 29TH AVE N STE 202,,NASHVILLE,TN,372031448,367500000X,37203,37203,34980,Physician Assistants & Advanced Practice Nursi...,"Nurse Anesthetist, Certified Registered",,50
2588,1376809590,1,,ZELLER,EMEM,ASUQUO,,,APN NP-C,28 WHITE BRIDGE RD STE 208,,NASHVILLE,TN,372051467,363LF0000X,37205,37205,34980,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Family,50
2589,1992057509,1,,MARCRUM,TRACI,,,,N.P.,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,372328802,363LA2100X,37232,37232,34980,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Acute Care,50
2590,1487096459,1,,GARG,RICHA,,DR.,,M.D.,1020 N HIGHLAND AVE,,MURFREESBORO,TN,371302494,207Q00000X,37130,37130,34980,Allopathic & Osteopathic Physicians,Family Medicine,,50


In [348]:
#Filter to Vanderbilt
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT npi, COALESCE(specialization, 'NONE')
        FROM npidata_nashville
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,npi,"COALESCE(specialization, 'NONE')"
0,1134122187,NONE
1,1003819046,Gynecology
2,1750384780,NONE
3,1922001957,NONE
4,1073516001,NONE
5,1780687830,Cytopathology
6,1760485817,NONE
7,1437152485,NONE
8,1497758445,NONE
9,1952304990,Cardiovascular Disease


In [47]:
# Find interesting npi_specialty
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT from_npi_specialty AS classification, COUNT(from_npi_specialty) AS classification_count
        From referrals
        GROUP BY from_npi_specialty
        ORDER BY COUNT(from_npi_specialty) DESC
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,classification,classification_count
0,Internal Medicine,10774
1,Radiology,5446
2,Nurse Practitioner,3025
3,Family Medicine,2536
4,Emergency Medicine,1399
5,"Nurse Anesthetist, Certified Registered",1231
6,Orthopaedic Surgery,1144
7,Anesthesiology,1017
8,Surgery,869
9,Psychiatry & Neurology,778


In [48]:
# Find interesting Specializations (can be filtered by specialty)
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT npidata_nashville.specialization, COUNT(npidata_nashville.specialization)
        From referrals
        JOIN npidata_nashville
        ON referrals.from_npi = npidata_nashville.npi
        WHERE referrals.from_npi_specialty = 'Nurse Practitioner'
        GROUP BY npidata_nashville.specialization
        ORDER BY COUNT(npidata_nashville.specialization) DESC
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,specialization,COUNT(npidata_nashville.specialization)
0,Family,1463
1,Acute Care,397
2,Adult Health,310
3,Psychiatric/Mental Health,131
4,Women's Health,75
5,Gerontology,52
6,Primary Care,17
7,Obstetrics & Gynecology,3
8,Critical Care Medicine,1
9,,0


In [46]:
# Find interesting Groupings (can be filtered by specialty)
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        SELECT npidata_nashville.grouping, COUNT(npidata_nashville.grouping)
        From referrals
        JOIN npidata_nashville
        ON referrals.from_npi = npidata_nashville.npi
        WHERE referrals.from_npi_specialty = 'Anesthesiology'
        GROUP BY npidata_nashville.grouping
        ORDER BY COUNT(npidata_nashville.grouping) DESC
        LIMIT 10
        """ 

    test_df = pd.read_sql(query, db)

display(test_df)

Unnamed: 0,grouping,COUNT(npidata_nashville.grouping)
0,Allopathic & Osteopathic Physicians,1017


In [18]:
# Find NPIs with largest number of referrals
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT SUM(transaction_count) AS total_referrals, to_npi, n.provider_org_name
    FROM filtered_hop_teaming AS f
    JOIN npidata AS n
    ON f.to_npi = n.npi
    GROUP BY to_npi
    ORDER BY total_referrals DESC
    LIMIT 20;
    """ 
    
    test = pd.read_sql(query, db)

test

Unnamed: 0,total_referrals,to_npi,provider_org_name
0,901945,1104202761,VANDERBILT UNIVERSITY MEDICAL CENTER
1,635811,1437194669,SAINT THOMAS MEDICAL PARTNERS
2,603385,1396882205,VANDERBILT UNIVERSITY MEDICAL CENTER
3,598277,1093741464,"ADVANCED DIAGNOSTIC IMAGING, PC"
4,526917,1861478489,RADIOLOGY ALLIANCE PC
5,445897,1003863580,"ASSOCIATED PATHOLOGISTS, LLC"
6,323305,1245393057,CENTENNIAL HEART LLC
7,245119,1215932413,"ANESTHESIA MEDICAL GROUP, PC"
8,240693,1811955917,TENNESSEE ONCOLOGY PLLC
9,228282,1235186800,"PATHGROUP LABS, LLC"


In [None]:
# Find competitor hosptitals with the largest number of total referrals.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT SUM(transaction_count) AS total_referrals, to_npi, n.provider_org_name, t.classification
    FROM filtered_hop_teaming AS f
    JOIN npidata AS n
    ON f.to_npi = n.npi
    JOIN taxonomy AS t
    USING(taxonomy_code)
    WHERE classification = 'Family Medicine'
    GROUP BY to_npi, provider_org_name
    --HAVING provider_org_name LIKE '%VUMC%'
    ORDER BY total_referrals DESC
    LIMIT 50;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Find competitor hosptitals with the largest number of total referrals.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT classification, SUM(transaction_count)
    FROM filtered_hop_teaming AS f
    JOIN npidata AS n
    ON f.to_npi = n.npi
    JOIN taxonomy AS t
    USING(taxonomy_code)
    --WHERE provider_org_name LIKE '%VANDERBILT%'
    GROUP BY classification
    ORDER BY SUM(transaction_count) DESC;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Find addresses of all Vanderbilt locations
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM npidata
    WHERE provider_org_name LIKE '%VANDERBILT%'
    GROUP BY provider_business_address_1
        , provider_business_city
        , provider_business_state
        , provider_business_zip5
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Find competitor hosptitals with the largest number of total referrals.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    WITH vandy_address AS (
        SELECT *
        FROM npidata
        WHERE provider_org_name LIKE '%VANDERBILT%'
        GROUP BY provider_business_address_1
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
    )
    SELECT *
    FROM npidata
    --JOIN vandy_address AS va USING(npi)
    WHERE provider_org_name NOT LIKE '%VANDERBILT%'
    AND provider_business_address_1 IN (SELECT provider_business_address_1 FROM vandy_address)
    AND provider_business_city IN (SELECT provider_business_city FROM vandy_address)
    AND provider_business_zip5 IN (SELECT provider_business_zip5 FROM vandy_address)
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# TESTING SQUARE
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT provider_business_state, COUNT(provider_business_state)
    FROM npidata
    GROUP BY provider_business_state
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# TESTING SQUARE
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM npidata
    LIMIT 1;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Tingting's code for exporting to Neo4j

nodes = list(set(ht.from_npi.tolist() + ht.to_npi.tolist()))
node_df = pd.DataFrame({'npi:ID': nodes})
node_df[':LABEL'] = "Provider"
node_df.to_csv('import/nodes.csv', index = False)
edges = pd.DataFrame({':START_ID' : ht.from_npi, 'patient_count': ht.patient_count, 
                      'transaction_count': ht.transaction_count, 
                     ':END_ID' : ht.to_npi})
edges[':TYPE'] = 'REFERRED_TO'
edges.to_csv('import/edges.csv', index = False)

In [None]:
# Test to confirm the tables loaded.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM taxonomy
    LIMIT 1;
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Test to confirm the tables loaded.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT DISTINCT classification
    FROM taxonomy
    """ 
    
    test = pd.read_sql(query, db)

test

In [None]:
# Test to confirm the tables loaded.
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT npidata.*
    FROM npidata
    JOIN taxonomy USING (taxonomy_code)
    WHERE classification = 'train'
    """ 
    
    test = pd.read_sql(query, db)

test

In [14]:
referrals = pd.read_csv('../data/nashville_referrals_normalised.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [17]:
referrals.head(10)

Unnamed: 0.1,Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait,from_zip,from_npi_specialty,from_entity_type_code,to_zip,to_npi_specialty,to_entity_type_code,to_facility,to_facility_group,to_facility_name_normalised
0,0,20662650,1780832899,1245233220,62,63,2.667,20.412,37203,Anesthesiology,1.0,37115,Clinic/Center,2.0,,,
1,1,16742214,1851362628,1790788040,26,65,3.231,18.277,37129,Internal Medicine,1.0,37129,"Hospice Care, Community Based",2.0,,,
2,2,6532675,1396753356,1609879956,27,68,4.529,27.525,37174,Internal Medicine,1.0,37214,"Hospice Care, Community Based",2.0,,,
3,3,18197644,1144264458,1609879956,36,77,3.247,27.359,37067,Family Medicine,1.0,37214,"Hospice Care, Community Based",2.0,,,
4,4,26837443,1942347513,1609879956,36,53,21.509,40.591,37217,Nurse Practitioner,1.0,37214,"Hospice Care, Community Based",2.0,,,
5,5,832492,1003833872,1568464873,49,117,19.983,26.492,37055,Counselor,1.0,37055,Family Medicine,2.0,,,
6,6,832494,1003858267,1568464873,50,63,17.698,34.482,37055,Hospitalist,1.0,37055,Family Medicine,2.0,,,
7,7,832495,1003862566,1568464873,91,94,42.968,47.179,37072,Radiology,1.0,37055,Family Medicine,2.0,,,
8,8,832497,1013226026,1568464873,165,239,42.209,54.333,37055,Nurse Practitioner,1.0,37055,Family Medicine,2.0,,,
9,9,832499,1003991167,1568464873,35,67,21.94,24.913,37203,Ophthalmology,1.0,37055,Family Medicine,2.0,,,
