In [1]:
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm # Provides progress bar for long tasks

In [2]:
pd.set_option("display.max_columns", 500)
pd.set_option('display.max_rows', 1000)

### Listing currently existing tables in the database

In [3]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT name
    FROM sqlite_master 
    WHERE type ='table' 
    AND name NOT LIKE 'sqlite_%';
    """ 
    
    db_table_list = pd.read_sql(query, db)

display(db_table_list)

Unnamed: 0,name
0,cbsa
1,npidata
2,taxonomy
3,hop_teaming_raw
4,filtered_hop_teaming


### `providers.csv`

We only want the providers in Nashville. Will need to join with CBSA here.<br>Exported from SQL-Lite `npidata` with applied filters: `entity_type_code == 1`

In [4]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT *
    FROM npidata AS n INNER JOIN cbsa AS c
    ON c.zip = CAST(n.provider_business_zip5 AS INTEGER)
    WHERE entity_type_code = 1 -- providers only
    AND UPPER(provider_business_state) IN ('TN', 'TENNESSEE');
    """
    
    providers = pd.read_sql(query, db)
        
    display(providers.shape)
    display(providers.head())

(30787, 17)

Unnamed: 0,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,primary_taxonomy,provider_business_zip5,zip,cbsa
0,1134122187,1,,RUDNICKE,CHERYL,DENISE,MRS.,,CRNP,250 25TH AVE N,STE 412,NASHVILLE,TN,363L00000X,37203,37203,34980
1,1003819046,1,,NYLANDER,BARBARA,H,,,M.D.,345 23RD AVE N,SUITE 209,NASHVILLE,TN,207VG0400X,37203,37203,34980
2,1750384780,1,,PERRIGIN,JULIE,A,DR.,,MD,219 CHURCH ST,,DICKSON,TN,207Q00000X,37055,37055,34980
3,1922001957,1,,PRESLEY,RICHARD,E,,,M.D.,2011 MURPHY AVE,STE 302,NASHVILLE,TN,207V00000X,37203,37203,34980
4,1073516001,1,,ROSS,DAVID,L,DR.,,MD,127 CRESTVIEW PARK DR,,DICKSON,TN,207R00000X,37055,37055,34980


In [5]:
# Fixing the "TENNESSEE" state
providers.loc[providers['provider_business_state'] == 'TENNESSEE', 'provider_business_state'] = 'TN'

### `facilities.csv`

We only want the facilites in Nashville. Will need to join with CBSA here.<br>Exported from SQL-Lite `npidata` with applied filters: `entity_type_code == 2`

In [6]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT *
    FROM npidata AS n INNER JOIN cbsa AS c
    ON c.zip = CAST(n.provider_business_zip5 AS INTEGER)
    WHERE entity_type_code = 2 -- facilities only
    AND UPPER(provider_business_state) IN ('TN', 'TENNESSEE');
    """
    
    facilities = pd.read_sql(query, db)
        
    display(facilities.shape)
    display(facilities.head())

(7551, 17)

Unnamed: 0,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,primary_taxonomy,provider_business_zip5,zip,cbsa
0,1881697092,2,"RHS, INC.",,,,,,,1330 TROTWOOD AVE,,COLUMBIA,TN,332B00000X,38401,38401,34980
1,1326041534,2,MURFREESBORO CONVALESCENT SERVICE,,,,,,,4428 LASCASSAS PIKE,,MURFREESBORO,TN,341600000X,37130,37130,34980
2,1063415834,2,ADVANCED DIABETIC SERVICES,,,,,,,151 HERITAGE PARK DR,STE 403,MURFREESBORO,TN,332B00000X,37129,37129,34980
3,1538162334,2,TENNESSEE IMAGING ALLIANCE LLC,,,,,,,2628 N MOUNT JULIET RD,,MOUNT JULIET,TN,261QR0208X,37122,37122,34980
4,1245233220,2,"NORTHRIDGE SURGERY CENTER, LP",,,,,,,647 MYATT DR,,MADISON,TN,261QA1903X,37115,37115,34980


### `filtered_hop_teaming.csv`

Exported from SQL-Lite `filtered_hop_teaming`

In [7]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT *
    FROM filtered_hop_teaming;
    """ 
    
    filtered_hop_teaming = pd.read_sql(query, db)

display(filtered_hop_teaming.shape)
display(filtered_hop_teaming.head())

(8846709, 6)

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1033194220,1003000308,62,80,26.175,53.78
1,1003818055,1003000563,101,146,0.0,0.0
2,1013967371,1003001538,46,61,21.033,27.679
3,1023073459,1003001553,25,51,34.314,32.55
4,1023003142,1003001553,164,222,33.923,52.133


### `specializations.csv`

Exported from SQL-Lite `taxonomy` with only `taxonomy_code` and non-null `specializations`

In [8]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT 
        taxonomy_code, 
        specialization
    FROM taxonomy
    WHERE specialization IS NOT NULL;
    """ 
    
    specializations = pd.read_sql(query, db)

display(specializations.shape)
display(specializations.head())

(625, 2)

Unnamed: 0,taxonomy_code,specialization
0,207KA0200X,Allergy
1,207KI0005X,Clinical & Laboratory Immunology
2,207LA0401X,Addiction Medicine
3,207LC0200X,Critical Care Medicine
4,207LH0002X,Hospice and Palliative Medicine


### `classifications.csv`

Exported from SQL-Lite `taxonomy` with only `taxonomy_code` and non-null `classifications`

In [9]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT 
        taxonomy_code, 
        classification
    FROM taxonomy
    WHERE classification IS NOT NULL;
    """ 
    
    classifications = pd.read_sql(query, db)

display(classifications.shape)
display(classifications.head())

(865, 2)

Unnamed: 0,taxonomy_code,classification
0,193200000X,Multi-Specialty
1,193400000X,Single Specialty
2,207K00000X,Allergy & Immunology
3,207KA0200X,Allergy & Immunology
4,207KI0005X,Allergy & Immunology


### `groupings.csv`

Exported from SQL-Lite `taxonomy` with only `taxonomy_code` and non-null `groupings`

In [10]:
with sqlite3.connect('../data/hcbb_group_reviews.sqlite') as db :
    query = """
    SELECT 
        taxonomy_code, 
        grouping
    FROM taxonomy
    WHERE grouping IS NOT NULL;
    """ 
    
    groupings = pd.read_sql(query, db)

display(groupings.shape)
display(groupings.head())

(865, 2)

Unnamed: 0,taxonomy_code,grouping
0,193200000X,Group
1,193400000X,Group
2,207K00000X,Allopathic & Osteopathic Physicians
3,207KA0200X,Allopathic & Osteopathic Physicians
4,207KI0005X,Allopathic & Osteopathic Physicians


### Export all to CSV

In [11]:
providers.to_csv("../neo4j_imports/from_group_reviews/providers.csv", index=False)

In [12]:
facilities.to_csv("../neo4j_imports/from_group_reviews/facilities.csv", index=False)

In [13]:
filtered_hop_teaming.to_csv("../neo4j_imports/from_group_reviews/filtered_hop_teaming.csv", index=False)

In [14]:
specializations.to_csv("../neo4j_imports/from_group_reviews/specializations.csv", index=False)

In [15]:
classifications.to_csv("../neo4j_imports/from_group_reviews/classifications.csv", index=False)

In [16]:
groupings.to_csv("../neo4j_imports/from_group_reviews/groupings.csv", index=False)