In [1]:
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm # Provides progress bar for long tasks

In [2]:
pd.set_option("display.max_columns", 500)

### Listing currently existing tables in the database

In [3]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT name
    FROM sqlite_master 
    WHERE type ='table' 
    AND name NOT LIKE 'sqlite_%';
    """ 
    
    db_table_list = pd.read_sql(query, db)

display(db_table_list)

Unnamed: 0,name
0,taxonomy
1,hop_teaming
2,npidata
3,filtered_hop_teaming


### `providers.csv`

Exported from SQL-Lite `npidata` with applied filters: `entity_type_code == 1`

In [4]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM npidata
    WHERE entity_type_code = 1;
    """ 
    
    providers = pd.read_sql(query, db)

display(providers.shape)
display(providers)

(32949, 16)

Unnamed: 0,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5
0,1134122187,1,,RUDNICKE,CHERYL,DENISE,MRS.,,CRNP,250 25TH AVE N,STE 412,NASHVILLE,TN,372031632,363L00000X,37203
1,1003819046,1,,NYLANDER,BARBARA,H,,,M.D.,345 23RD AVE N,SUITE 209,NASHVILLE,TN,372031513,207VG0400X,37203
2,1750384780,1,,PERRIGIN,JULIE,A,DR.,,MD,219 CHURCH ST,,DICKSON,TN,370551303,207Q00000X,37055
3,1922001957,1,,PRESLEY,RICHARD,E,,,M.D.,2011 MURPHY AVE,STE 302,NASHVILLE,TN,372032023,207V00000X,37203
4,1073516001,1,,ROSS,DAVID,L,DR.,,MD,127 CRESTVIEW PARK DR,,DICKSON,TN,370552850,207R00000X,37055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32944,1477145944,1,,HILL,DOMINIQUE,DANIELLE,MRS.,,"M.ED., LPC",1712 BISCAYNE DR,,FRANKLIN,TN,370671475,101YP2500X,37067
32945,1447842935,1,,CLEVELAND,MEREDITH,B,MRS.,,"MA, LPC-MHSP",5205 MARYLAND WAY STE 310,,BRENTWOOD,TN,370271049,101YM0800X,37027
32946,1639761190,1,,ARNDT,VERONICA,LEE,,,NP,30 HAMILTON STATION XING APT 104,,LEBANON,TN,370879017,163W00000X,37087
32947,1588256051,1,,WRAY,DAWN,MICHELLE,,,"MSN, APRN, FNP-C",3712 OLD CLARKSVILLE PIKE,,JOELTON,TN,370808818,363LF0000X,37080


### `facilities.csv`

Exported from SQL-Lite `npidata` with applied filters: `entity_type_code == 2`

In [5]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM npidata
    WHERE entity_type_code = 2;
    """ 
    
    facilities = pd.read_sql(query, db)

display(facilities.shape)
display(facilities.head())

(8359, 16)

Unnamed: 0,npi,entity_type_code,provider_org_name,provider_last_name,provider_first_name,provider_middle_name,provider_name_prefix,provider_name_suffix,provider_credential,provider_business_address_1,provider_business_address_2,provider_business_city,provider_business_state,provider_business_zip,taxonomy_code,provider_business_zip5
0,1881697092,2,"RHS, INC.",,,,,,,1330 TROTWOOD AVE,,COLUMBIA,TN,384014701,332B00000X,38401
1,1326041534,2,MURFREESBORO CONVALESCENT SERVICE,,,,,,,4428 LASCASSAS PIKE,,MURFREESBORO,TN,371306871,341600000X,37130
2,1063415834,2,ADVANCED DIABETIC SERVICES,,,,,,,151 HERITAGE PARK DR,STE 403,MURFREESBORO,TN,371290505,332B00000X,37129
3,1538162334,2,TENNESSEE IMAGING ALLIANCE LLC,,,,,,,2628 N MOUNT JULIET RD,,MOUNT JULIET,TN,371228015,261QR0208X,37122
4,1245233220,2,"NORTHRIDGE SURGERY CENTER, LP",,,,,,,647 MYATT DR,,MADISON,TN,371152125,261QA1903X,37115


### `filtered_hop_teaming.csv`

Exported from SQL-Lite `filtered_hop_teaming`

In [6]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT *
    FROM filtered_hop_teaming;
    """ 
    
    filtered_hop_teaming = pd.read_sql(query, db)

display(filtered_hop_teaming.shape)
display(filtered_hop_teaming.head())

(44299, 6)

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003963976,1003028770,2535,3945,0.0,0.0
1,1033191986,1003052697,196,300,38.877,35.767
2,1023019429,1003052697,42,61,25.607,40.824
3,1003854258,1003052697,31,55,20.436,26.403
4,1033246640,1003863580,58,58,45.603,56.574


### `specializations.csv`

Exported from SQL-Lite `taxonomy` with only `taxonomy_code` and non-null `specializations`

In [7]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT 
        taxonomy_code, 
        specialization
    FROM taxonomy
    WHERE specialization IS NOT NULL;
    """ 
    
    specializations = pd.read_sql(query, db)

display(specializations.shape)
display(specializations.head())

(625, 2)

Unnamed: 0,taxonomy_code,specialization
0,207KA0200X,Allergy
1,207KI0005X,Clinical & Laboratory Immunology
2,207LA0401X,Addiction Medicine
3,207LC0200X,Critical Care Medicine
4,207LH0002X,Hospice and Palliative Medicine


### `classifications.csv`

Exported from SQL-Lite `taxonomy` with only `taxonomy_code` and non-null `classifications`

In [8]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT 
        taxonomy_code, 
        classification
    FROM taxonomy
    WHERE classification IS NOT NULL;
    """ 
    
    classifications = pd.read_sql(query, db)

display(classifications.shape)
display(classifications.head())

(865, 2)

Unnamed: 0,taxonomy_code,classification
0,193200000X,Multi-Specialty
1,193400000X,Single Specialty
2,207K00000X,Allergy & Immunology
3,207KA0200X,Allergy & Immunology
4,207KI0005X,Allergy & Immunology


### `groupings.csv`

Exported from SQL-Lite `taxonomy` with only `taxonomy_code` and non-null `groupings`

In [9]:
with sqlite3.connect('../data/hcbb.sqlite') as db :
    query = """
    SELECT 
        taxonomy_code, 
        grouping
    FROM taxonomy
    WHERE grouping IS NOT NULL;
    """ 
    
    groupings = pd.read_sql(query, db)

display(groupings.shape)
display(groupings.head())

(865, 2)

Unnamed: 0,taxonomy_code,grouping
0,193200000X,Group
1,193400000X,Group
2,207K00000X,Allopathic & Osteopathic Physicians
3,207KA0200X,Allopathic & Osteopathic Physicians
4,207KI0005X,Allopathic & Osteopathic Physicians


### Export all to CSV

In [10]:
providers.to_csv("../neo4j_imports/providers.csv", index=False)

In [11]:
facilities.to_csv("../neo4j_imports/facilities.csv", index=False)

In [12]:
filtered_hop_teaming.to_csv("../neo4j_imports/filtered_hop_teaming.csv", index=False)

In [13]:
specializations.to_csv("../neo4j_imports/specializations.csv", index=False)

In [14]:
classifications.to_csv("../neo4j_imports/classifications.csv", index=False)

In [15]:
groupings.to_csv("../neo4j_imports/groupings.csv", index=False)