In [55]:
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import chart_studio
import chart_studio.plotly as py

In [56]:
pd.set_option("display.max_columns", 500)

In [57]:
file = open("../../plotly_api_key.txt")

api_key = file.read().replace("\n", " ")
file.close()

In [58]:
username = 'matttparker'

In [60]:
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

## Create Visual of NPIs to Poach - NOT IN PRESENTATION

In [32]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE to_facility_group <> 'Vanderbilt University Medical Center'
            AND from_npi_specialty NOT IN ('Radiology', 'Emergency Medicine', 'Orthopaedic Surgery',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
            HAVING to_groups = 1 AND npi_transactions >= 500
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , COALESCE(specialization, 'None') AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
        FROM CTE_2
        ORDER BY to_facility_group
        """ 

    poach = pd.read_sql(query, db)

display(poach)

Unnamed: 0,Facility_Group,npi,Name,Classification,Specialization,NPI_Transactions,NPI_Patients,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5
0,Ascension Saint Thomas,1770519704,DAVID GIBSON,Internal Medicine,Cardiovascular Disease,1648,1454,4230 HARDING RD,SUITE 330,NASHVILLE,TN,37205
1,Ascension Saint Thomas,1720241185,WARREN STRIBLING,Internal Medicine,Advanced Heart Failure and Transplant Cardiology,1648,711,4230 HARDING RD.,STE. 330,NASHVILLE,TN,37205
2,Ascension Saint Thomas,1598719536,DON CHOMSKY,Internal Medicine,Cardiovascular Disease,1621,701,4230 HARDING RD,SUITE 330,NASHVILLE,TN,37205
3,Ascension Saint Thomas,1457317257,GUY MIOTON,Internal Medicine,Cardiovascular Disease,1458,1155,1840 MEDICAL CENTER PKWY,SUITE 201,MURFREESBORO,TN,37129
4,Ascension Saint Thomas,1942429816,ANDREW ZURICK,Internal Medicine,Cardiovascular Disease,1421,1265,4230 HARDING RD.,SUITE 330,NASHVILLE,TN,37205
...,...,...,...,...,...,...,...,...,...,...,...,...
279,Williamson Medical Center,1346397130,PAUL FLESER,Surgery,Vascular Surgery,570,330,100 COVEY DR,SUITE 204,FRANKLIN,TN,37067
280,Williamson Medical Center,1952771941,JAYANTHI SAMUEL,Nurse Practitioner,Family,519,467,4323 CAROTHERS PKWY,SUITE 205,FRANKLIN,TN,37067
281,Williamson Medical Center,1033246640,OUIDA COLLINS,Family Medicine,,510,407,3601 TVC,,NASHVILLE,TN,37232
282,Williamson Medical Center,1063864775,PAULA DUNN,Family Medicine,,504,237,4091 MALLORY LN,,FRANKLIN,TN,37067


In [61]:
fig = px.treemap(poach, path=['Facility_Group', 'Classification', 'Name'], values='NPI_Patients',
                color='NPI_Transactions', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
fig.show()
#fig.write_html("/Users/mattparker/Documents/nss_projects/hcbb_hop_team-3m/notebooks/poaching_plotly.html")
py.plot(fig, filename = 'poaching_plotly_published', auto_open=True)

'https://plotly.com/~matttparker/1/'

# Percent Competition: Question 2/3 Visualization

In [46]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH vandy_selector AS (
            SELECT from_npi, patient_count, CASE
                WHEN to_facility_group = 'Vanderbilt University Medical Center' THEN 'vandy'
                ELSE 'not_vandy' END AS to_vandy,
                SUM(patient_count) OVER(PARTITION BY from_npi) AS total_patients
            FROM referrals
        ), percents AS (
            SELECT from_npi AS npi
                , patient_count AS non_vandy_patients
                , total_patients
                , ROUND(100.0 * patient_count/total_patients, 2) AS pct_competition
            FROM vandy_selector
            WHERE to_vandy = 'not_vandy'
            GROUP BY from_npi, to_vandy
        ), npi_details AS (
            SELECT npi
                , provider_first_name || ' ' || provider_last_name AS Name
                , classification AS Classification
                , COALESCE(specialization, 'None') AS Specialization
                , provider_business_address_1
                , provider_business_address_2
                , provider_business_city
                , provider_business_state
                , provider_business_zip5
            FROM npidata_nashville
            GROUP BY npi
        )
        SELECT *
        FROM percents
        JOIN npi_details USING(npi)
        WHERE classification NOT IN ('Emergency Medicine', 'Orthopaedic Surgery', --'Radiology',
                                            'Pathology', 'Anesthesiology')
        AND total_patients >= 500
        """ 

    percents_treemap = pd.read_sql(query, db)

display(percents_treemap)

Unnamed: 0,npi,non_vandy_patients,total_patients,pct_competition,Name,Classification,Specialization,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5
0,1003819277,91,675,13.48,JOHN CAGE,Internal Medicine,Cardiovascular Disease,222 22ND AVE N,STE 400,NASHVILLE,TN,37203
1,1003862566,1110,2738,40.54,MARC SOBLE,Radiology,Diagnostic Radiology,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
2,1013179860,71,1241,5.72,GARY YAWN,Internal Medicine,Interventional Cardiology,353 NEW SHACKLE ISLAND RD STE 300C,,HENDERSONVILLE,TN,37075
3,1013184340,1781,2257,78.91,MELINDA SAVA,Radiology,Diagnostic Radiology,3024 BUSINESS PARK CIR,,GOODLETTSVILLE,TN,37072
4,1013908730,462,548,84.31,JOSEPH PARKER,Internal Medicine,Gastroenterology,222 22ND AVE N,,NASHVILLE,TN,37203
...,...,...,...,...,...,...,...,...,...,...,...,...
441,1972658060,1770,1878,94.25,TERRY KETCH,Internal Medicine,Cardiovascular Disease,3443 DICKERSON PIKE STE 430,,NASHVILLE,TN,37207
442,1982661617,96,680,14.12,CHRISTIE GREEN,Internal Medicine,Nephrology,1617 WILLIAMS DR,STE. 200,MURFREESBORO,TN,37129
443,1982796306,247,752,32.85,MATTHEW ABBATE,Internal Medicine,,3601 TVC,,NASHVILLE,TN,37232
444,1992064935,82,686,11.95,MENGISTU BERIE,Internal Medicine,,1005 DR. D.B.TODD JR. BLVD.,,NASHVILE,TN,37208


In [47]:
percent_competition = px.treemap(percents_treemap, path=['Classification', 'Specialization', 'Name'], values='non_vandy_patients',
                color='pct_competition', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5', 'total_patients'],
                color_continuous_scale='Emrld')
percent_competition.show()
#percent_competition.write_html("/Users/mattparker/Documents/nss_projects/hcbb_hop_team-3m/notebooks/percent_competition_radiology.html")
percent_competition.plot(fig, filename = 'percent_competition_radiology', auto_open=True)


# Alvin Request #1 (Alternate to Previous Visual) - NOT IN PRESENTATION

In [36]:
fig_1 = px.treemap(poach, path=['Classification', 'Specialization', 'Name'], values='NPI_Patients',
                color='NPI_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
fig_1.show()

# Hospital to Specialty

In [41]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE from_npi_specialty NOT IN ('Emergency Medicine', 'Orthopaedic Surgery', --'Radiology',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , COALESCE(specialization, 'None') AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
            , SUM(NPI_Patients) AS Total_Patients
        FROM CTE_2
        GROUP BY to_facility_group, classification, specialization
        """ 

    poach2 = pd.read_sql(query, db)

display(poach2)

Unnamed: 0,Facility_Group,npi,Name,Classification,Specialization,NPI_Transactions,NPI_Patients,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5,Total_Patients
0,Ascension Saint Thomas,1497709752,ERIKA GILBERT,Anesthesiologist Assistant,,87,84,1800 MEDICAL CENTER PKWY,SUITE 330,MURFREESBORO,TN,37129,84
1,Ascension Saint Thomas,1487780268,NORMA KRANTZ,Clinical Nurse Specialist,Women's Health,146,141,300 20TH AVE N STE 401,,NASHVILLE,TN,37203,141
2,Ascension Saint Thomas,1952561474,SREE SURYADEVARA,Colon & Rectal Surgery,,129,98,2011 CHURCH ST,STE. 703,NASHVILLE,TN,37203,226
3,Ascension Saint Thomas,1952304966,JOHN SALYER,Family Medicine,,904,604,127 CRESTVIEW PARK DR,,DICKSON,TN,37055,6116
4,Ascension Saint Thomas,1386878957,HOLLY BLANKENSHIP,Family Medicine,Adult Medicine,105,75,370 DOOLITTLE RD STE 1,,WOODBURY,TN,37190,75
...,...,...,...,...,...,...,...,...,...,...,...,...,...
452,Williamson Medical Center,1538157219,LAURA BASKIN,Surgery,,266,143,1272 GARRISON DR,,MURFREESBORO,TN,37129,913
453,Williamson Medical Center,1174622500,MARK KELLEY,Surgery,Surgical Oncology,509,300,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,37232,300
454,Williamson Medical Center,1750327052,CARY PULLIAM,Surgery,Vascular Surgery,691,385,4601 CAROTHERS PKWY,STE 375,FRANKLIN,TN,37067,949
455,Williamson Medical Center,1184657272,ERIC LAMBRIGHT,Thoracic Surgery (Cardiothoracic Vascular Surg...,,492,258,3601 VANDERBILT CLINIC,,NASHVILLE,TN,37232,258


In [42]:
hospital_to_specialty = px.treemap(poach2, path=['Facility_Group', 'Classification', 'Specialization'], values='Total_Patients',
                color='Total_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
hospital_to_specialty.show()
hospital_to_specialty.write_html("/Users/mattparker/Documents/nss_projects/hcbb_hop_team-3m/notebooks/hospital_to_specialty_radiology.html")


# Alvin Request #3

In [43]:
# From below, summarize by Facility Group
# Find hospital groups with the largest number of single group, non-Vanderbilt NPIs
# Remove Radiology
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
        WITH CTE AS (
            SELECT from_npi
                , COUNT(DISTINCT to_facility_group) AS to_groups
                , SUM(transaction_count) AS npi_transactions
                , SUM(patient_count) AS npi_patients
                , to_facility_group
            FROM referrals
            WHERE from_npi_specialty NOT IN ('Emergency Medicine', 'Orthopaedic Surgery', --'Radiology',
                                            'Pathology', 'Anesthesiology')
            GROUP BY from_npi
        ), CTE_2 AS(
            SELECT CTE.to_facility_group, CTE.npi_transactions, CTE.npi_patients, n.*
            FROM CTE
            INNER JOIN npidata_nashville AS n 
                ON CTE.from_npi = n.npi
            ORDER BY CTE.npi_transactions DESC
        )
        SELECT to_facility_group AS Facility_Group
            , npi
            , provider_first_name || ' ' || provider_last_name AS Name
            , classification AS Classification
            , COALESCE(specialization, 'None') AS Specialization
            , npi_transactions AS NPI_Transactions
            , npi_patients AS NPI_Patients
            , provider_business_address_1
            , provider_business_address_2
            , provider_business_city
            , provider_business_state
            , provider_business_zip5
            , SUM(NPI_Patients) AS Total_Patients
        FROM CTE_2
        GROUP BY classification, specialization, to_facility_group
        """ 

    poach3 = pd.read_sql(query, db)

display(poach3)

Unnamed: 0,Facility_Group,npi,Name,Classification,Specialization,NPI_Transactions,NPI_Patients,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip5,Total_Patients
0,HCA,1780731661,ROBERT VALET,Allergy & Immunology,,369,345,4230 HARDING PIKE,EAST PLAZA SUITE 703,NASHVILLE,TN,37205,627
1,Maury Regional Medical Center,1558310367,JOHN NORVELL,Allergy & Immunology,,156,88,400 SUGARTREE LN,SUITE 100,FRANKLIN,TN,37064,88
2,Vanderbilt University Medical Center,1518041623,RYSZARD DWORSKI,Allergy & Immunology,,413,289,3601 TVC,,NASHVILLE,TN,37232,335
3,Williamson Medical Center,1649226770,HAROLD MOESSNER,Allergy & Immunology,,169,79,1909 MALLORY LN,SUITE 308,FRANKLIN,TN,37067,151
4,Vanderbilt University Medical Center,1184617441,KENNETH BABE,Allergy & Immunology,Allergy,343,220,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,37232,343
...,...,...,...,...,...,...,...,...,...,...,...,...,...
452,Maury Regional Medical Center,1710955034,JAMES MARSHALL,Urology,,1051,600,1222 TROTWOOD AVE,SUITE 601,COLUMBIA,TN,38401,1216
453,Nashville General Hosptial,1609052554,KELVIN MOSES,Urology,,374,152,3601 THE VANDERBILT CLINIC,,NASHVILLE,TN,37232,152
454,Vanderbilt University Medical Center,1922107358,SAM CHANG,Urology,,1441,733,3601 TVC,,NASHVILLE,TN,37232,4550
455,Williamson Medical Center,1750382727,JOEL LOCKE,Urology,,747,488,4601 CAROTHERS PKWY,SUITE 475,FRANKLIN,TN,37067,1272


In [45]:
classification_to_hospital = px.treemap(poach3, path=['Classification', 'Specialization', 'Facility_Group'], values='Total_Patients',
                color='Total_Patients', 
                hover_data=['Name', 'Classification', 'Specialization', 'provider_business_address_1',
                           'provider_business_address_2', 'provider_business_city',
                           'provider_business_state', 'provider_business_zip5'],
                color_continuous_scale='Emrld')
classification_to_hospital.show()
classification_to_hospital.write_html("/Users/mattparker/Documents/nss_projects/hcbb_hop_team-3m/notebooks/classification_to_hospital_radiology.html")
