In [46]:
import pandas as pd
import sqlite3
import plotly.express as px
import plotly.io as pio

In [23]:
query = """
SELECT *
FROM nnpes AS np
INNER JOIN nucc_taxonomy AS nt
ON np.primary_taxonomy = nt.code
INNER JOIN zip_cbsa AS zc
ON np.postal_code = zc.zip
WHERE entity_type_code = 2.0
    AND state = 'TN'
    AND cbsa = 34980
    AND classification LIKE '%Hospital'
    AND organization_name NOT LIKE '%PAY%'
"""

with sqlite3.connect('../data/hop_teaming_database.sqlite') as db: 
    hospitals = pd.read_sql(query, db)

hospitals

Unnamed: 0,npi,entity_type_code,organization_name,last_name,provider_first_name,provider_middle_name,name_prefix,name_suffix,provider_credential_text,provider_first_line_business_practice_location_address,...,display_name,section,zip,cbsa,usps_zip_pref_city,usps_zip_pref_state,res_ratio,bus_ratio,oth_ratio,tot_ratio
0,1700983236,2.0,"CANNON COUNTY HOSPITAL, LLC",,,,,,,520 W MAIN ST,...,General Acute Care Hospital,Non-Individual,37166,34980,SMITHVILLE,TN,0.009600,0.00000,0.0,0.008709
1,1578693545,2.0,FRANK T RUTHERFORD HOSPITAL INC,,,,,,,130 LEBANON HWY,...,General Acute Care Hospital,Non-Individual,37030,34980,CARTHAGE,TN,0.999715,1.00000,1.0,0.999759
2,1609056399,2.0,VANDERBILT CHILDREN'S,,,,,,,1702 THE VANDERBILT CLINIC,...,Children's Hospital,Non-Individual,37232,34980,NASHVILLE,TN,0.000000,1.00000,1.0,1.000000
3,1861675647,2.0,RIVER REGION HOSPITAL,,,,,,,49 MUSIC SQ W,...,Psychiatric Hospital,Non-Individual,37203,34980,NASHVILLE,TN,1.000000,1.00000,1.0,1.000000
4,1265445506,2.0,WILLIAMSON COUNTY HOSPITAL DISTRICT,,,,,,,4321 CAROTHERS PARKWAY,...,General Acute Care Hospital,Non-Individual,37067,34980,FRANKLIN,TN,1.000000,1.00000,1.0,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,1821350349,2.0,"TRUSTPOINT HOSPITAL, LLC",,,,,,,1009 N THOMPSON LN,...,Rehabilitation Hospital,Non-Individual,37129,34980,MURFREESBORO,TN,1.000000,1.00000,1.0,1.000000
137,1922463900,2.0,"NATIONAL BIRTH CENTERS, INC.",,,,,,,115 CREEKVIEW RD,...,General Acute Care Hospital,Non-Individual,38483,34980,SUMMERTOWN,TN,0.025432,0.02381,0.0,0.025210
138,1023747797,2.0,PRIVATE HEALTHCARE FACILITIES,,,,,,,115 CREEKVIEW RD,...,General Acute Care Hospital,Non-Individual,38483,34980,SUMMERTOWN,TN,0.025432,0.02381,0.0,0.025210
139,1447762505,2.0,"NATIONAL BIRTH CENTERS, INC.",,,,,,,80 OAK VALLEY DR,...,General Acute Care Hospital,Non-Individual,37174,34980,SPRING HILL,TN,0.998723,1.00000,1.0,0.998809


In [24]:
var = tuple(hospitals['npi'].tolist())

query =f"""
SELECT DISTINCT organization_name
FROM hop_teaming ht
INNER JOIN nnpes np
ON ht.to_npi = np.npi
WHERE to_npi IN {var}
"""

with sqlite3.connect('../data/hop_teaming_database.sqlite') as db: 
    hop_hospitals = pd.read_sql(query, db)

hop_hospitals

Unnamed: 0,organization_name
0,SAINT THOMAS WEST HOSPITAL
1,"HCA HEALTH SERVICES OF TENNESSEE, INC."
2,VANDERBILT UNIVERSITY MEDICAL CENTER
3,MAURY REGIONAL HOSPITAL
4,"SAINT THOMAS STONES RIVER HOSPITAL, LLC"
5,"SAINT THOMAS RIVER PARK HOSPITAL, LLC"
6,RIVERVIEW MEDICAL CENTER LLC
7,NASHVILLE GENERAL HOSPITAL
8,SAINT THOMAS RUTHERFORD HOSPITAL
9,SUMNER REGIONAL MEDICAL CENTER LLC


In [27]:
query = """
WITH hospitals AS (
    SELECT DISTINCT npi
    FROM nnpes AS np
    INNER JOIN nucc_taxonomy AS nt
    ON np.primary_taxonomy = nt.code
    INNER JOIN zip_cbsa AS zc
    ON np.postal_code = zc.zip
    WHERE entity_type_code = 2.0
        AND state = 'TN'
        AND cbsa = 34980
    AND classification LIKE '%Hospital'
    AND organization_name NOT LIKE '%PAY%'
)
SELECT
    from_npi,
    to_npi,
    SUM(patient_count) AS patient_total,
    SUM(transaction_count) AS transaction_total
FROM hop_teaming
WHERE to_npi IN hospitals
GROUP BY from_npi, to_npi
"""

# LIMIT 1000;

with sqlite3.connect('../data/hop_teaming_database.sqlite') as db: 
    providers_to_hospitals = pd.read_sql(query, db)

providers_to_hospitals

Unnamed: 0,from_npi,to_npi,patient_total,transaction_total
0,1003013160,1629025648,33,51
1,1003013947,1023055126,99,279
2,1003019902,1396882205,215,339
3,1003019902,1558408633,56,62
4,1003028770,1861479545,1905,2649
...,...,...,...,...
14026,1992972087,1720032345,200,346
14027,1992985055,1942442710,122,164
14028,1992985055,1992776405,71,113
14029,1992985949,1023055126,277,767


In [31]:
query = '''
WITH provider_referrals AS (
SELECT 
    n1.npi,
    t1.specialization
FROM nnpes n1
INNER JOIN nucc_taxonomy as t1
ON n1.primary_taxonomy = t1.code
WHERE n1.entity_type_code = 1
    AND t1.specialization <> 'None'
),
major_hospitals AS (
SELECT
    n2.npi,
    n2.organization_name,
    CASE
        WHEN organization_name = 'VANDERBILT UNIVERSITY MEDICAL CENTER'
            OR organization_name = 'SHELBYVILLE HOSPITAL COMPANY LLC'
            THEN 'Vanderbilt'
        WHEN organization_name = 'HCA HEALTH SERVICES OF TENNESSEE, INC.'
            OR organization_name = 'HTI MEMORIAL HOSPITAL CORPORATION'
            OR organization_name = 'HENDERSONVILLE HOSPITAL CORPORATION'
            OR organization_name = 'CENTRAL TENNESSEE HOSPITAL CORPORATION'
            OR organization_name = 'NORTHCREST MEDICAL CENTER'
            THEN 'HCA Healthcare'
        WHEN organization_name = 'SAINT THOMAS WEST HOSPITAL'
            OR organization_name = 'SAINT THOMAS RUTHERFORD HOSPITAL'
            OR organization_name = 'SAINT THOMAS RIVER PARK HOSPITAL, LLC'
            OR organization_name = 'SAINT THOMAS DEKALB HOSPITAL, LLC'
            OR organization_name = 'SAINT THOMAS STONES RIVER HOSPITAL, LLC'
            OR organization_name = 'RIVER PARK HOSPITAL LLC'
            THEN 'Ascension'
        WHEN organization_name = 'MAURY REGIONAL HOSPITAL'
            OR organization_name = 'MAURY REGIONAL HOSPITAL MARSHALL MEDICAL CENTER'
            THEN 'Maury Regional Medical Group'
        WHEN organization_name = 'WILLIAMSON COUNTY HOSPITAL DISTRICT'
            THEN 'Williamson Medical Group'
        WHEN organization_name = 'SUMNER REGIONAL MEDICAL CENTER LLC'
            OR organization_name = 'RIVERVIEW MEDICAL CENTER LLC'
            OR organization_name = 'SUMNER REGIONAL HEALTH SYSTEMS, INC'
            OR organization_name = 'TROUSDALE MEDICAL CENTER LLC'
            OR organization_name = 'SRHS HOLDINGS LLC'
            THEN 'High Point Health System'
        ELSE 'other'
    END AS parent_org    
FROM nnpes n2
INNER JOIN nucc_taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zc
ON n2.postal_code = zc.zip
WHERE n2.state = 'TN'
    AND n2.entity_type_code = 2
    AND t2.classification LIKE '%Hospital'
    AND zc.cbsa = 34980
)
SELECT
    pr.npi AS from_npi,
    pr.specialization,
    mh.parent_org, 
    SUM(hop.transaction_count) AS total_transactions,
    SUM(hop.patient_count) AS total_patients
FROM hop_teaming AS hop
INNER JOIN provider_referrals pr
ON hop.from_npi = pr.npi
INNER JOIN major_hospitals mh
ON hop.to_npi = mh.npi 
GROUP BY pr.npi, pr.specialization, mh.parent_org
--GROUP BY mh.parent_org
ORDER BY total_patients DESC;
'''

with sqlite3.connect('../data/hop_teaming_database.sqlite') as db: 
    providers_to_major = pd.read_sql(query, db)

providers_to_major

Unnamed: 0,from_npi,specialization,parent_org,total_transactions,total_patients
0,1093753303,Anatomic Pathology & Clinical Pathology,HCA Healthcare,26785,15029
1,1417131715,Interventional Cardiology,HCA Healthcare,9912,6360
2,1912984758,Diagnostic Radiology,HCA Healthcare,5050,4648
3,1871548818,Diagnostic Radiology,HCA Healthcare,5124,4547
4,1134321235,Diagnostic Radiology,Ascension,5713,4526
...,...,...,...,...,...
3547,1801301999,Acute Care,Vanderbilt,58,17
3548,1861571168,Clinical,Vanderbilt,50,17
3549,1104808641,Nephrology,Vanderbilt,57,16
3550,1144287772,Psychiatry,Maury Regional Medical Group,102,14


In [32]:
special_referrals = (
    providers_to_major
        .drop(columns='from_npi')
        .groupby(['specialization','parent_org'])
        .sum(['total_transactions', 'total_patients'])
        .sort_values(['specialization', 'total_patients'], ascending=[True,False])
        .reset_index()
)

In [33]:
special_referrals['patients_ratio'] = (
    special_referrals['total_patients'] / special_referrals.groupby('specialization')['total_patients'].transform('sum')
)

In [34]:
(
special_referrals
    .loc[special_referrals
        .groupby(['specialization'])['patients_ratio']
        .transform(max) == special_referrals['patients_ratio']]
    .sort_values('patients_ratio', ascending = False)
)

Unnamed: 0,specialization,parent_org,total_transactions,total_patients,patients_ratio
310,"Psychiatric/Mental Health, Adult",Vanderbilt,53,47,1.000000
78,Clinical Genetics (M.D.),Vanderbilt,66,53,1.000000
251,Obstetrics,Maury Regional Medical Group,134,94,1.000000
82,Corneal and Contact Management,Vanderbilt,93,71,1.000000
83,Counseling,Ascension,91,55,1.000000
...,...,...,...,...,...
318,Pulmonary Disease,HCA Healthcare,28458,11540,0.332191
224,Nephrology,Vanderbilt,20436,10324,0.312564
114,Family,Vanderbilt,25172,17454,0.295961
138,Gastroenterology,Vanderbilt,11407,7869,0.293773


Sunburst Charts

In [53]:
sunburst_data = (
    special_referrals
        .loc[special_referrals
            .groupby(['specialization'])['patients_ratio']
            .transform(max) == special_referrals['patients_ratio']]
        .sort_values('patients_ratio', ascending = False)
        .groupby('parent_org')['specialization']
        .count()
        .sort_values()
        .reset_index()
        .rename(columns={'parent_org': 'parent', 'specialization': 'value'})
)


In [54]:
exclude_list = ['other', 'Williamson Medical Group', 'High Point Health System']

# Filter the DataFrame to exclude the rows with 'parent' values in the exclude_list
sunburst_data_filtered = sunburst_data[~sunburst_data['parent'].isin(exclude_list)]

In [55]:
fig = px.sunburst(
    sunburst_data_filtered, 
    path=['parent', 'value'], 
    values='value',
    color='value',
    color_continuous_scale=[
        [0, '#005b96'],
        [0.25, '#6497b1'],
        [0.5, '#c3d7e2'],
        [0.75, '#e8c547'],
        [1, '#d90429'],
    ],
    custom_data=['parent', 'value']
)

fig.update_traces(
    hovertemplate='<b>%{customdata[0]}</b><br>%{label}: %{customdata[1]}'
)

fig.update_layout(
    font=dict(
        family='Arial, sans-serif',
        size=16,
        color='#7f7f7f'
    ),
    margin=dict(l=20, r=20, t=50, b=20),
    width=800
)

fig.show()

# Write the sunburst chart to an HTML file
#pio.write_html(fig, 'sunburst_chart.html')



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Next Sunburst Chart

In [67]:
(
    providers_to_major
    .loc[~providers_to_major['from_npi']
            .isin(providers_to_major
                .loc[providers_to_major['parent_org'] == 'Vanderbilt']
                ['from_npi']
                .unique())]
    .groupby('specialization')
    ['from_npi']
    .nunique()
    .sort_values(ascending=False)
    .head(5)
)


specialization
Family                    270
Diagnostic Radiology      129
Cardiovascular Disease     64
Acute Care                 59
Pulmonary Disease          54
Name: from_npi, dtype: int64

In [71]:
# Your code to create the sunburst_data_filtered DataFrame goes here
providers_to_major_filtered = providers_to_major.loc[~providers_to_major['from_npi'].isin(providers_to_major.loc[providers_to_major['parent_org'] == 'Vanderbilt']['from_npi'].unique())]

# Create the sunburst chart
fig = px.sunburst(
    providers_to_major_filtered.groupby('specialization')['from_npi']
    .nunique()
    .sort_values(ascending=False)
    .reset_index(name='value')
    .head(5),  # Select only the top 5 specializations
    path=['specialization'],
    values='value',
    color='value',
    color_continuous_scale=[        [0, '#005b96'],
        [0.25, '#6497b1'],
        [0.5, '#c3d7e2'],
        [0.75, '#e8c547'],
        [1, '#d90429'],
    ],
    custom_data=['specialization', 'value']
)

fig.update_traces(
    hovertemplate='<b>%{customdata[0]}</b><br>%{label}: %{customdata[1]}'
)

fig.update_layout(
    font=dict(
        family='Arial, sans-serif',
        size=16,
        color='#7f7f7f'
    ),
    margin=dict(l=20, r=20, t=50, b=20),
    width=800
)

fig.show()



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

