# Base Referral Dataset

### FIlter down referrals from Nashville Providers to Nashville Organizations that are Acute Care Hospitals

In [1]:
# import libraries
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm

# Set the display to show more rows and columns
pd.options.display.max_rows = 500
pd.options.display.max_columns = 200

In [2]:
# Assign sqlite database to db variable
db = sqlite3.connect('../data/hop_teaming.sqlite')

In [3]:
# Write a query to get every unique from-to NPI pair where
# The from_npi is an individual provider
# The to_npi is an organization
# Both of the NPIs are located in the Nashville CBSA
# Average day wait >= 2 to filter out emergency referrals

nashville_referrals_normalised_query = """
WITH nashville_providers AS (
    SELECT n.*
        , s.Classification AS specialty
        , h.*

    FROM nppes AS n

    JOIN cbsa AS c
        ON c.ZIP = CAST(n.zip_5 AS INTEGER)
    
    LEFT JOIN specialty AS s
        ON s.npi = n.npi
        
    LEFT JOIN hospital_names AS h
        ON n.npi = h.to_npi        
        
    WHERE n.state IN ('TN', 'TENNESSEE')
)

SELECT r.*
, CAST(n2.zip_5 AS INTEGER) AS from_zip
, n2.specialty AS from_npi_specialty
, n2.entity_type_code AS from_entity_type_code
, CAST(n.zip_5 AS INTEGER) AS to_zip
, n.specialty AS to_npi_specialty
, n.entity_type_code AS to_entity_type_code
, n.to_facility
, n.to_facility_group
, n.to_facility_name_normalised

FROM referrals AS r

JOIN nashville_providers AS n
    ON n.npi = r.to_npi

JOIN nashville_providers AS n2
    ON n2.npi = r.from_npi

WHERE n.entity_type_code = 2 -- only get referrals made *to* organizations
AND n2.entity_type_code = 1 -- only get referrrals made *by* individuals
AND n.specialty = 'General Acute Care Hospital' -- only get referrals to acute care hospitals
--AND r.average_day_wait >= 2 -- filter out emergency referrals

"""
nashville_referrals_normalised = pd.read_sql(nashville_referrals_normalised_query, db)

In [4]:
nashville_referrals_normalised.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6436 entries, 0 to 6435
Data columns (total 16 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   index                        6436 non-null   int64  
 1   from_npi                     6436 non-null   int64  
 2   to_npi                       6436 non-null   int64  
 3   patient_count                6436 non-null   int64  
 4   transaction_count            6436 non-null   int64  
 5   average_day_wait             6436 non-null   float64
 6   std_day_wait                 6436 non-null   float64
 7   from_zip                     6436 non-null   int64  
 8   from_npi_specialty           6425 non-null   object 
 9   from_entity_type_code        6436 non-null   float64
 10  to_zip                       6436 non-null   int64  
 11  to_npi_specialty             6436 non-null   object 
 12  to_entity_type_code          6436 non-null   float64
 13  to_facility       

In [None]:
nashville_referrals_normalised.to_csv('../data/nashville_referrals_normalised_only_hospitals_any_avg_day_wait.csv')