In [3]:
import sqlite3
import pandas as pd
#pd.set_option('display.max_rows', None)

In [4]:
db = sqlite3.connect('Hop_Teaming_2018.sqlite')

## Create indexes for faster access to data for operations that return a small portion of a table's rows

In [46]:
with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    sqliteCursor = db.cursor()
    db.execute('CREATE INDEX IF NOT EXISTS Hop_Teaming_2018_from_npi ON Hop_Teaming_2018 (from_npi)')
    db.execute('CREATE INDEX IF NOT EXISTS Hop_Teaming_2018_to_npi ON Hop_Teaming_2018 (to_npi)')
    db.execute('CREATE UNIQUE INDEX IF NOT EXISTS nppes_npi ON nppes (npi)')
    db.execute('CREATE INDEX IF NOT EXISTS nppes_postal_code ON nppes (provider_business_practice_location_address_postal_code)')
    db.execute('CREATE UNIQUE INDEX IF NOT EXISTS taxonomy_code ON taxonomy (code)')
    db.execute('CREATE UNIQUE INDEX IF NOT EXISTS zip_cbsa_unique ON zip_cbsa (zip,cbsa)')
    db.execute('CREATE INDEX IF NOT EXISTS zip_cbsa_zip ON zip_cbsa (zip)')
    db.execute('CREATE INDEX IF NOT EXISTS zip_cbsa_cbsa ON zip_cbsa (cbsa)')
    db.execute('CREATE INDEX IF NOT EXISTS nppes_organization_name ON nppes (`provider_organization_name_(legal_business_name)`)')

## First, build a profile of providers referring patients to the major hospitals in Nashville. Are certain specialties more likely to refer to a particular hospital over the others?

To get major hospitals:  
1. join zip_cbsa table (cbsa 94180)
2. find the total number of referrals order by DESC
3. select top 10 / 15 entity that received the highest number of patients/referrals by specialization

In [26]:
# Create a new table that is a subset of Hop_Teaming_2018, our MONEY TABLE
query = '''
CREATE TABLE hop_major_TN_hospitals AS
SELECT hop.*
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zip
ON n2.provider_business_practice_location_address_postal_code = zip.zip
WHERE n1.entity_type_code = 1
    AND n2.provider_business_practice_location_address_state_name = 'TN'
    AND n2.entity_type_code = 2
    AND t.specialization <> 'None'
    AND t2.classification LIKE '%Hospital'
    AND zip.cbsa = 34980;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    db.execute(query)  
with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    db.execute('CREATE INDEX IF NOT EXISTS hop_major_TN_hospitals_from_npi ON hop_major_TN_hospitals (from_npi)')
    db.execute('CREATE INDEX IF NOT EXISTS hop_major_TN_hospitals_to_npi ON hop_major_TN_hospitals (to_npi)')

In [5]:
# View tables in database
con = sqlite3.connect('Hop_Teaming_2018.sqlite') 
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('nppes',), ('Hop_Teaming_2018',), ('taxonomy',), ('zip_cbsa',), ('Hop_major_TN_hospitals_res_filter',), ('hop_major_TN_hospitals',), ('hop_neo4j',), ('facility_affiliations',), ('TN_Hospital_Info',)]


## provider_organization_name_(legal_business_name) = organization name (Hospital)

In [17]:
# Find major hospitals
query = '''
SELECT t.specialization, n2.`provider_organization_name_(legal_business_name)`, 
       SUM(hop.transaction_count) AS total_transactions, SUM(hop.patient_count) AS total_patients
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zip
ON n2.provider_business_practice_location_address_postal_code = zip.zip
WHERE n1.entity_type_code = 1
    AND n2.provider_business_practice_location_address_state_name = 'TN'
    AND n2.entity_type_code = 2
    AND t.specialization <> 'None'
    AND t2.classification LIKE '%Hospital'
    AND zip.cbsa = 34980
GROUP BY n2.`provider_organization_name_(legal_business_name)`
ORDER BY total_patients DESC;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Class_Hos_sqlite = pd.read_sql(query, db)
    
Class_Hos_sqlite

Unnamed: 0,specialization,provider_organization_name_(legal_business_name),total_transactions,total_patients
0,Family,VANDERBILT UNIVERSITY MEDICAL CENTER,485582,335156
1,Anatomic Pathology & Clinical Pathology,"HCA HEALTH SERVICES OF TENNESSEE, INC.",317471,219785
2,Diagnostic Radiology,SAINT THOMAS WEST HOSPITAL,231948,162537
3,Cardiovascular Disease,MAURY REGIONAL HOSPITAL,137679,92355
4,Cardiovascular Disease,HTI MEMORIAL HOSPITAL CORPORATION,73096,52344
5,Advanced Heart Failure and Transplant Cardiology,SAINT THOMAS RUTHERFORD HOSPITAL,68434,48344
6,Clinical Cardiac Electrophysiology,WILLIAMSON COUNTY HOSPITAL DISTRICT,66423,47434
7,Emergency Medical Services,SUMNER REGIONAL MEDICAL CENTER LLC,56606,37481
8,Family,HENDERSONVILLE HOSPITAL CORPORATION,46960,34818
9,Neurology,CENTRAL TENNESSEE HOSPITAL CORPORATION,29921,23127


In [46]:
#Get Specializations Count by Major Hospitals
query = '''
SELECT n2.`provider_organization_name_(legal_business_name)`, 
       COUNT(DISTINCT(specialization)) AS count_specialization
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n1
ON h.from_npi = n1.npi
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
GROUP BY n2.`provider_organization_name_(legal_business_name)`
ORDER BY count_specialization DESC;
'''


with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Major_Hos_specialization = pd.read_sql(query, db)
    
Major_Hos_specialization

Unnamed: 0,provider_organization_name_(legal_business_name),count_specialization
0,VANDERBILT UNIVERSITY MEDICAL CENTER,96
1,SAINT THOMAS WEST HOSPITAL,56
2,"HCA HEALTH SERVICES OF TENNESSEE, INC.",56
3,MAURY REGIONAL HOSPITAL,50
4,WILLIAMSON COUNTY HOSPITAL DISTRICT,43
5,SAINT THOMAS RUTHERFORD HOSPITAL,42
6,HENDERSONVILLE HOSPITAL CORPORATION,38
7,HTI MEMORIAL HOSPITAL CORPORATION,36
8,SUMNER REGIONAL MEDICAL CENTER LLC,35
9,NORTHCREST MEDICAL CENTER,24


In [6]:
# Find Major Hospitals. Same query as above but using the new 'hop_major_TN_hospitals' table to speed up run time. 
query = '''
SELECT n2.`provider_organization_name_(legal_business_name)`, 
       SUM(h.transaction_count) AS total_transactions, SUM(h.patient_count) AS total_patients
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
GROUP BY n2.`provider_organization_name_(legal_business_name)`
ORDER BY total_patients DESC;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Major_Hos_sqlite = pd.read_sql(query, db)
    
Major_Hos_sqlite

Unnamed: 0,provider_organization_name_(legal_business_name),total_transactions,total_patients
0,VANDERBILT UNIVERSITY MEDICAL CENTER,485582,335156
1,"HCA HEALTH SERVICES OF TENNESSEE, INC.",317471,219785
2,SAINT THOMAS WEST HOSPITAL,231948,162537
3,MAURY REGIONAL HOSPITAL,137679,92355
4,HTI MEMORIAL HOSPITAL CORPORATION,73096,52344
5,SAINT THOMAS RUTHERFORD HOSPITAL,68434,48344
6,WILLIAMSON COUNTY HOSPITAL DISTRICT,66423,47434
7,SUMNER REGIONAL MEDICAL CENTER LLC,56606,37481
8,HENDERSONVILLE HOSPITAL CORPORATION,46960,34818
9,CENTRAL TENNESSEE HOSPITAL CORPORATION,29921,23127


In [34]:
Major_Hos_sqlite.to_csv('Major_Hospitals.csv')

### First, build a profile of providers referring patients to the major hospitals in Nashville

In [65]:
# Unique Specialization Count for Each Major Hospital
query = '''
SELECT  
        CASE WHEN n2.`provider_organization_name_(legal_business_name)` = 'VANDERBILT UNIVERSITY MEDICAL CENTER'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SHELBYVILLE HOSPITAL COMPANY LLC'
            THEN 'Vanderbilt'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'HCA HEALTH SERVICES OF TENNESSEE, INC.'
            OR n2.`provider_organization_name_(legal_business_name)` = 'HTI MEMORIAL HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'HENDERSONVILLE HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'CENTRAL TENNESSEE HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'NORTHCREST MEDICAL CENTER'
            THEN 'HCA Healthcare'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS WEST HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS RUTHERFORD HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS RIVER PARK HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS DEKALB HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS STONES RIVER HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'RIVER PARK HOSPITAL LLC'
            THEN 'Ascension'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'MAURY REGIONAL HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'MAURY REGIONAL HOSPITAL MARSHALL MEDICAL CENTER'
            THEN 'Maury Regional Medical Group'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'WILLIAMSON COUNTY HOSPITAL DISTRICT'
            THEN 'Williamson Medical Group'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'SUMNER REGIONAL MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'RIVERVIEW MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SUMNER REGIONAL HEALTH SYSTEMS, INC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'TROUSDALE MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SRHS HOLDINGS LLC'
            THEN 'High Point Health System'
        ELSE 'Other'
    END AS parent_organization, 
      COUNT(DISTINCT(t.specialization)) AS specialization_count
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n1
ON h.from_npi = n1.npi
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
GROUP BY parent_organization
ORDER BY specialization_count DESC;
'''


with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Unique_Specialization_Count = pd.read_sql(query, db)
    
Unique_Specialization_Count

Unnamed: 0,parent_organization,specialization_count
0,Vanderbilt,97
1,Ascension,65
2,HCA Healthcare,62
3,Maury Regional Medical Group,50
4,Williamson Medical Group,43
5,High Point Health System,37
6,Other,32


In [102]:
# Get popular specializations for Vanderbilt based on total patients referred 
query = '''
SELECT  
        CASE WHEN n2.`provider_organization_name_(legal_business_name)` = 'VANDERBILT UNIVERSITY MEDICAL CENTER'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SHELBYVILLE HOSPITAL COMPANY LLC'
            THEN 'Vanderbilt'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'HCA HEALTH SERVICES OF TENNESSEE, INC.'
            OR n2.`provider_organization_name_(legal_business_name)` = 'HTI MEMORIAL HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'HENDERSONVILLE HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'CENTRAL TENNESSEE HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'NORTHCREST MEDICAL CENTER'
            THEN 'HCA Healthcare'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS WEST HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS RUTHERFORD HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS RIVER PARK HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS DEKALB HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS STONES RIVER HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'RIVER PARK HOSPITAL LLC'
            THEN 'Ascension'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'MAURY REGIONAL HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'MAURY REGIONAL HOSPITAL MARSHALL MEDICAL CENTER'
            THEN 'Maury Regional Medical Group'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'WILLIAMSON COUNTY HOSPITAL DISTRICT'
            THEN 'Williamson Medical Group'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'SUMNER REGIONAL MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'RIVERVIEW MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SUMNER REGIONAL HEALTH SYSTEMS, INC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'TROUSDALE MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SRHS HOLDINGS LLC'
            THEN 'High Point Health System'
        ELSE 'Other'
    END AS parent_organization, t.specialization, SUM(h.patient_count) AS total_patients
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n1
ON h.from_npi = n1.npi
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
WHERE parent_organization LIKE '%Van%'
GROUP BY t.specialization
ORDER BY total_patients DESC;
'''

#COUNT(DISTINCT(t.specialization)) AS specialization_count
#GROUP BY parent_organization
#ORDER BY specialization_count DESC

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Vandy_Specialization_Count = pd.read_sql(query, db)
    
Vandy_Specialization_Count.head(50)

Unnamed: 0,parent_organization,specialization,total_patients
0,Vanderbilt,Diagnostic Radiology,86490
1,Vanderbilt,Cardiovascular Disease,59378
2,Vanderbilt,Family,17454
3,Vanderbilt,Acute Care,13753
4,Vanderbilt,Anatomic Pathology & Clinical Pathology,12474
5,Vanderbilt,Nephrology,10324
6,Vanderbilt,Hematology & Oncology,10161
7,Vanderbilt,Neurology,9686
8,Vanderbilt,Gastroenterology,7869
9,Vanderbilt,Adult Health,6587


In [101]:
pd.set_option('display.max_rows', None)

In [66]:
# Specialization to Major Hospitals
query = '''
SELECT t.specialization, n2.`provider_organization_name_(legal_business_name)`, 
       SUM(h.transaction_count) AS total_transactions, SUM(h.patient_count) AS total_patients
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n1
ON h.from_npi = n1.npi
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
GROUP BY t.specialization, n2.`provider_organization_name_(legal_business_name)`
ORDER BY total_patients DESC;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Specialization_to_Hos_sqlite = pd.read_sql(query, db)
    
Specialization_to_Hos_sqlite.head()

Unnamed: 0,specialization,provider_organization_name_(legal_business_name),total_transactions,total_patients
0,Diagnostic Radiology,"HCA HEALTH SERVICES OF TENNESSEE, INC.",101364,94621
1,Diagnostic Radiology,VANDERBILT UNIVERSITY MEDICAL CENTER,89513,81764
2,Diagnostic Radiology,SAINT THOMAS WEST HOSPITAL,74059,65059
3,Cardiovascular Disease,VANDERBILT UNIVERSITY MEDICAL CENTER,77918,58410
4,Diagnostic Radiology,MAURY REGIONAL HOSPITAL,44284,38907


In [38]:
Specialization_to_Hos_sqlite.to_csv('Specialization_to_Major_Hospitals.csv')

### Are certain specialties more likely to refer to a particular hospital over the others?

In [16]:
# Get popularity ratio for each major hospital within a given specialization
# Group by specialization and organization name
query = '''
WITH specialization AS (
    SELECT t.specialization, SUM(h.transaction_count) AS total_transactions, SUM(h.patient_count) AS total_patients
    FROM hop_major_TN_hospitals AS h
    INNER JOIN nppes n1
    ON h.from_npi = n1.npi
    INNER JOIN taxonomy as t
    ON n1.primary_taxonomy = t.code
    GROUP BY t.specialization
)
SELECT t.specialization, n2.`provider_organization_name_(legal_business_name)`, 
       SUM(h.transaction_count) AS total_transactions, SUM(h.patient_count) AS total_patients,
       s.total_patients AS specialization_total_patients_ref,
       SUM(h.patient_count)*1.0/s.total_patients AS specialization_referral_ratio
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n1
ON h.from_npi = n1.npi
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN specialization AS s
ON s.specialization = t.specialization 
GROUP BY t.specialization, n2.`provider_organization_name_(legal_business_name)`
ORDER BY t.specialization, specialization_referral_ratio DESC;
'''


with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    popularity_ratio_sqlite = pd.read_sql(query, db)
    
popularity_ratio_sqlite.head()

Unnamed: 0,specialization,provider_organization_name_(legal_business_name),total_transactions,total_patients,specialization_total_patients_ref,specialization_referral_ratio
0,Acute Care,VANDERBILT UNIVERSITY MEDICAL CENTER,19828,13572,24337,0.557669
1,Acute Care,SAINT THOMAS WEST HOSPITAL,5989,4021,24337,0.165222
2,Acute Care,"HCA HEALTH SERVICES OF TENNESSEE, INC.",4525,3149,24337,0.129391
3,Acute Care,SAINT THOMAS RUTHERFORD HOSPITAL,1230,913,24337,0.037515
4,Acute Care,HTI MEMORIAL HOSPITAL CORPORATION,1192,784,24337,0.032214


## Solution 

Some specialties exclusively refer to one entity only, others such as ... are more likely to refer to a particular hospital over the others more than half the time. 

In [104]:
#retrieve max ratio for each specialization 
popularity_ratio_sqlite.groupby('specialization')['provider_organization_name_(legal_business_name)', 'specialization_referral_ratio', 'specialization_total_patients_ref'].max().sort_values(by = 'specialization_referral_ratio', ascending = False).head()

  popularity_ratio_sqlite.groupby('specialization')['provider_organization_name_(legal_business_name)', 'specialization_referral_ratio', 'specialization_total_patients_ref'].max().sort_values(by = 'specialization_referral_ratio', ascending = False).head()


Unnamed: 0_level_0,provider_organization_name_(legal_business_name),specialization_referral_ratio,specialization_total_patients_ref
specialization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Neurocritical Care,VANDERBILT UNIVERSITY MEDICAL CENTER,1.0,91
Obstetrics,MAURY REGIONAL HOSPITAL,1.0,94
Pediatric Nephrology,VANDERBILT UNIVERSITY MEDICAL CENTER,1.0,19
Oral and Maxillofacial Surgery,HTI MEMORIAL HOSPITAL CORPORATION,1.0,38
Diagnostic Neuroimaging,"MACON COUNTY GENERAL HOSPITAL, INC.",1.0,497


In [22]:
popularity_ratio_sqlite.to_csv('most_referred_hos_by_specialization.csv')

## Additional

In [18]:
# Get popularity ratio for each major hospital within a given specialization
# Group by specialization and parent company

query = '''
WITH specialization AS (
    SELECT t.specialization, SUM(h.transaction_count) AS total_transactions, SUM(h.patient_count) AS total_patients
    FROM hop_major_TN_hospitals AS h
    INNER JOIN nppes n1
    ON h.from_npi = n1.npi
    INNER JOIN taxonomy as t
    ON n1.primary_taxonomy = t.code
    GROUP BY t.specialization
)
SELECT t.specialization, 
        CASE WHEN n2.`provider_organization_name_(legal_business_name)` = 'VANDERBILT UNIVERSITY MEDICAL CENTER'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SHELBYVILLE HOSPITAL COMPANY LLC'
            THEN 'Vanderbilt'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'HCA HEALTH SERVICES OF TENNESSEE, INC.'
            OR n2.`provider_organization_name_(legal_business_name)` = 'HTI MEMORIAL HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'HENDERSONVILLE HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'CENTRAL TENNESSEE HOSPITAL CORPORATION'
            OR n2.`provider_organization_name_(legal_business_name)` = 'NORTHCREST MEDICAL CENTER'
            THEN 'HCA Healthcare'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS WEST HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS RUTHERFORD HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS RIVER PARK HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS DEKALB HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SAINT THOMAS STONES RIVER HOSPITAL, LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'RIVER PARK HOSPITAL LLC'
            THEN 'Ascension'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'MAURY REGIONAL HOSPITAL'
            OR n2.`provider_organization_name_(legal_business_name)` = 'MAURY REGIONAL HOSPITAL MARSHALL MEDICAL CENTER'
            THEN 'Maury Regional Medical Group'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'WILLIAMSON COUNTY HOSPITAL DISTRICT'
            THEN 'Williamson Medical Group'
        WHEN n2.`provider_organization_name_(legal_business_name)` = 'SUMNER REGIONAL MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'RIVERVIEW MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SUMNER REGIONAL HEALTH SYSTEMS, INC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'TROUSDALE MEDICAL CENTER LLC'
            OR n2.`provider_organization_name_(legal_business_name)` = 'SRHS HOLDINGS LLC'
            THEN 'High Point Health System'
        ELSE 'Other'
    END AS parent_organization, 
       SUM(h.transaction_count) AS total_transactions, SUM(h.patient_count) AS total_patients,
       s.total_patients AS specialization_total_patients_ref,
       SUM(h.patient_count)*1.0/s.total_patients AS specialization_referral_ratio
FROM hop_major_TN_hospitals AS h
INNER JOIN nppes n1
ON h.from_npi = n1.npi
INNER JOIN nppes n2
ON h.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN specialization AS s
ON s.specialization = t.specialization 
GROUP BY t.specialization, parent_organization
ORDER BY t.specialization, specialization_referral_ratio DESC;
'''


with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    parent_popularity_ratio_sqlite = pd.read_sql(query, db)
    
parent_popularity_ratio_sqlite.head()

Unnamed: 0,specialization,parent_organization,total_transactions,total_patients,specialization_total_patients_ref,specialization_referral_ratio
0,Acute Care,Vanderbilt,20031,13753,24337,0.565107
1,Acute Care,Ascension,7219,4934,24337,0.202737
2,Acute Care,HCA Healthcare,6480,4566,24337,0.187616
3,Acute Care,Maury Regional Medical Group,1169,763,24337,0.031351
4,Acute Care,High Point Health System,321,228,24337,0.009368
