In [6]:
import sqlite3
import pandas as pd
pd.set_option('display.max_rows', None)

In [7]:
db = sqlite3.connect('Hop_Teaming_2018.sqlite')

In [9]:
with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    sqliteCursor = db.cursor()
    db.execute('CREATE INDEX IF NOT EXISTS Hop_Teaming_2018_from_npi ON Hop_Teaming_2018 (from_npi)')
    db.execute('CREATE INDEX IF NOT EXISTS Hop_Teaming_2018_to_npi ON Hop_Teaming_2018 (to_npi)')
    db.execute('CREATE UNIQUE INDEX IF NOT EXISTS nppes_npi ON nppes (npi)')
    db.execute('CREATE UNIQUE INDEX IF NOT EXISTS taxonomy_code ON taxonomy (code)')
    db.execute('CREATE UNIQUE INDEX IF NOT EXISTS zip_cbsa_unique ON zip_cbsa (zip,cbsa)')
    db.execute('CREATE INDEX IF NOT EXISTS zip_cbsa_zip ON zip_cbsa (zip)')
    db.execute('CREATE INDEX IF NOT EXISTS nppes_organization_name ON nppes (`provider_organization_name_(legal_business_name)`)')

## First, build a profile of providers referring patients to the major hospitals in Nashville. Are certain specialties more likely to refer to a particular hospital over the others?

To get major hospitals:  
1. join zip_cbsa table (cbsa 94180)
2. find the total number of referrals order by DESC
3. select top 10 / 15 entity with highest number of referrals

EXPLAIN QUERY PLAN - show estimated query run time 

In [5]:
# Results without t2.classification LIKE '%Hospital'
query = '''
SELECT t.specialization, n2.`provider_organization_name_(legal_business_name)`, 
       SUM(hop.transaction_count) AS total_transactions, SUM(hop.patient_count) AS total_patients
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN zip_cbsa as zip
ON n2.provider_business_practice_location_address_postal_code = zip.zip
WHERE n1.entity_type_code = 1
    AND n2.provider_business_practice_location_address_state_name = 'TN'
    AND n2.entity_type_code = 2
    AND t.specialization <> 'None'
    AND zip.cbsa = 34980
    AND zip.res_ratio >= 0.5
GROUP BY t.specialization, n2.`provider_organization_name_(legal_business_name)`
ORDER BY total_patients DESC
LIMIT 100;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    cbsa_34980_sqlite = pd.read_sql(query, db)
    
cbsa_34980_sqlite

Unnamed: 0,specialization,provider_organization_name_(legal_business_name),total_transactions,total_patients
0,Anatomic Pathology & Clinical Pathology,"ASSOCIATED PATHOLOGISTS, LLC",358401,241879
1,Anatomic Pathology & Clinical Pathology,ASSOCIATED PATHOLOGISTS LLC,304988,197323
2,Diagnostic Radiology,RADIOLOGY ALLIANCE PC,201363,178421
3,Anatomic Pathology & Clinical Pathology,"PATHGROUP LABS, LLC",212407,120418
4,Diagnostic Radiology,"ADVANCED DIAGNOSTIC IMAGING, PC",136787,119829
5,Diagnostic Radiology,SAINT THOMAS MEDICAL PARTNERS,118707,103727
6,Diagnostic Radiology,"HCA HEALTH SERVICES OF TENNESSEE, INC.",102504,95717
7,Diagnostic Radiology,TENNESSEE ONCOLOGY PLLC,106262,94586
8,Diagnostic Radiology,CENTENNIAL HEART LLC,76340,71601
9,Cardiovascular Disease,SAINT THOMAS MEDICAL PARTNERS,90374,52832


In [11]:
# Final results with t2.classification LIKE '%Hospital'
query = '''
SELECT t.specialization, n2.`provider_organization_name_(legal_business_name)`, 
       SUM(hop.transaction_count) AS total_transactions, SUM(hop.patient_count) AS total_patients
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zip
ON n2.provider_business_practice_location_address_postal_code = zip.zip
WHERE n1.entity_type_code = 1
    AND n2.provider_business_practice_location_address_state_name = 'TN'
    AND n2.entity_type_code = 2
    AND t.specialization <> 'None'
    AND t2.classification LIKE '%Hospital'
    AND zip.cbsa = 34980
    AND zip.res_ratio >= 0.5
GROUP BY t.specialization, n2.`provider_organization_name_(legal_business_name)`
ORDER BY total_patients DESC
LIMIT 100;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    Class_Hos_sqlite = pd.read_sql(query, db)
    
Class_Hos_sqlite

Unnamed: 0,specialization,provider_organization_name_(legal_business_name),total_transactions,total_patients
0,Diagnostic Radiology,"HCA HEALTH SERVICES OF TENNESSEE, INC.",101364,94621
1,Diagnostic Radiology,MAURY REGIONAL HOSPITAL,44284,38907
2,Diagnostic Radiology,SAINT THOMAS WEST HOSPITAL,43030,36715
3,Diagnostic Radiology,HTI MEMORIAL HOSPITAL CORPORATION,29490,27111
4,Anatomic Pathology & Clinical Pathology,"HCA HEALTH SERVICES OF TENNESSEE, INC.",35078,22779
5,Diagnostic Radiology,SAINT THOMAS RUTHERFORD HOSPITAL,25063,21610
6,Diagnostic Radiology,WILLIAMSON COUNTY HOSPITAL DISTRICT,22734,20038
7,Cardiovascular Disease,"HCA HEALTH SERVICES OF TENNESSEE, INC.",29500,18674
8,Cardiovascular Disease,SAINT THOMAS WEST HOSPITAL,23705,16639
9,Diagnostic Radiology,HENDERSONVILLE HOSPITAL CORPORATION,16722,15831


In [12]:
Class_Hos_sqlite.to_csv('specialization_to_maj_Hospitals.csv')

# Took too long to run
query = '''
SELECT n1.npi AS from_npi, t.specialization, n2.`provider_organization_name_(legal_business_name)`, 
       SUM(hop.transaction_count) AS total_transactions, SUM(hop.patient_count) AS total_patients
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN zip_cbsa as zip
ON n2.provider_business_practice_location_address_postal_code = zip.zip
WHERE n1.entity_type_code = 1
    AND n2.provider_business_practice_location_address_state_name = 'TN'
    AND n2.entity_type_code = 2
    AND t.specialization <> 'None'
    AND t.classification LIKE '%Hospital'
    AND zip.cbsa = 34980
    AND zip.res_ratio >= 0.5
GROUP BY n1.npi, t.specialization, n2.`provider_organization_name_(legal_business_name)`
ORDER BY total_patients DESC;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    cbsa_34980_sqlite = pd.read_sql(query, db)
    
cbsa_34980_sqlite

## Determine which professionals Vanderbilt Hospital should reach out to in the Nashville area to expand their own patient volume.
## First, research which professionals are sending significant numbers of patients only to competitor hospitals (such as TriStar Centennial Medical Center).
## Next, consider the specialty of the provider. If Vanderbilt wants to increase volume from Orthopedic Surgeons or from Family Medicine doctors who should they reach out to in those areas?

In [16]:
# Create a new table that is a subset of Hop_Teaming_2018
query = '''
CREATE TABLE hop_major_TN_hospitals AS
SELECT hop.*
FROM Hop_Teaming_2018 AS hop
INNER JOIN nppes n1
ON hop.from_npi = n1.npi
INNER JOIN nppes n2
ON hop.to_npi = n2.npi 
INNER JOIN taxonomy as t
ON n1.primary_taxonomy = t.code
INNER JOIN taxonomy as t2
ON n2.primary_taxonomy = t2.code
INNER JOIN zip_cbsa as zip
ON n2.provider_business_practice_location_address_postal_code = zip.zip
WHERE n1.entity_type_code = 1
    AND n2.provider_business_practice_location_address_state_name = 'TN'
    AND n2.entity_type_code = 2
    AND t.specialization <> 'None'
    AND t2.classification LIKE '%Hospital'
    AND zip.cbsa = 34980
    AND zip.res_ratio >= 0.5;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    db.execute(query)  
with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    db.execute('CREATE INDEX IF NOT EXISTS hop_major_TN_hospitals_from_npi ON hop_major_TN_hospitals (from_npi)')
    db.execute('CREATE INDEX IF NOT EXISTS hop_major_TN_hospitals_to_npi ON hop_major_TN_hospitals (to_npi)')

In [6]:
query = '''
SELECT DISTINCT (specialization), COUNT(display_name)
FROM taxonomy
GROUP BY COUNT(display_name);
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    specialization_sqlite = pd.read_sql(query, db)
    
specialization_sqlite

Unnamed: 0,specialization
0,
1,Allergy
2,Clinical & Laboratory Immunology
3,Addiction Medicine
4,Critical Care Medicine
5,Hospice and Palliative Medicine
6,Pain Medicine
7,Pediatric Anesthesiology
8,Clinical & Laboratory Dermatological Immunology
9,Dermatopathology


# Tomo
query = """
WITH hospitals AS (
    SELECT DISTINCT npi
    FROM nnpes AS np
    INNER JOIN nucc_taxonomy AS nt
    ON np.primary_taxonomy = nt.code
    INNER JOIN zip_cbsa AS zc
    ON np.address_postal_code = zc.zip
    WHERE entity_type_code = 2.0
        AND address_state_name = 'TN'
        AND cbsa = 34980
        AND res_ratio >= 0.5
        AND classification LIKE '%Hospital'
        AND organization_name != 'BUNDLED PAYMENT SERVICES, LLC'
        AND organization_name != 'STATE OF TENNESSEE STATE F&A PAYROLL'
)
SELECT
    from_npi,
    to_npi,
    SUM(patient_count) AS patient_total,
    SUM(transaction_count) AS transaction_total
FROM hop_teaming
WHERE to_npi IN hospitals
GROUP BY from_npi, to_npi
"""

# number of places that refer to major hospitals 14031
# number of unique places that refer to hospitals 7688

with sqlite3.connect('../data/hop_teaming_database.sqlite') as db: 
    providers_to_hospitals = pd.read_sql(query, db)

providers_to_hospitals

# Then, this groups it by to_npi:

total_referrals = (
    providers_to_hospitals
    .groupby('to_npi')[['patient_total','transaction_total']]
    .sum()
    .sort_values(by='patient_total',ascending= False)
    .reset_index()
)

# And than I use that list, to go back to sql to select hospitals. Convoluted, I know, so any suggestions welcome!!

var = tuple(total_referrals['to_npi'].tolist())

query =f"""
SELECT DISTINCT npi, organization_name
FROM nnpes
WHERE npi IN {var}
"""

with sqlite3.connect('../data/hop_teaming_database.sqlite') as db: 
    hospitals = pd.read_sql(query, db)

hospitals

In [16]:
query = '''
SELECT COUNT(*)
FROM taxonomy
WHERE t.classification LIKE '%Hospital%';
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    taxonomy_sqlite = pd.read_sql(query, db)
    
taxonomy_sqlite

Unnamed: 0,COUNT(*)
0,17


In [18]:
query = '''
SELECT *
FROM Hop_Teaming_2018
LIMIT 5;
'''

with sqlite3.connect('Hop_Teaming_2018.sqlite') as db: 
    test_sqlite = pd.read_sql(query, db)
    
test_sqlite

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1508085911,1730166125,58,67,23.925,43.923
1,1508167040,1730166125,51,51,28.196,52.876
2,1508863549,1730166125,340,391,18.302,42.422
3,1508867870,1730166125,50,79,12.658,26.402
4,1508011040,1730166224,132,145,8.579,28.053
