In [46]:
import pandas as pd
import sqlite3
import plotly.express as px

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [3]:
db = sqlite3.connect('../data/nppes_lite.sqlite') #reopen the connection

query = """
SELECT * 
FROM hop_team
"""
hop_team = pd.read_sql(query,db)

db.close() 

In [4]:
hop_team.head()

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1033142146,1000000004,491,535,10.232,36.558
1,1013977990,1003000126,134,145,27.352,51.137
2,1013996669,1003000126,91,92,35.152,68.009
3,1033102504,1003000126,52,64,15.328,38.3
4,1003029620,1003000126,111,121,33.058,58.981


In [5]:
hop_team.shape

(31704890, 6)

### Overlaps hop_team with npi from and to:

In [15]:
db = sqlite3.connect('../data/nppes_lite.sqlite') #reopen the connection

query = """
    SELECT *
    FROM hop_team
    WHERE from_npi IN(
        SELECT DISTINCT(npi)  
        FROM nash_nppes
        WHERE entity_type_code == 1)
    AND to_npi IN(
        SELECT DISTINCT(npi)
        FROM nash_nppes
        WHERE entity_type_code == 2)
"""

hops = pd.read_sql(query,db)

db.close()

In [16]:
hops.shape

(41127, 6)

In [17]:
hops.head()

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003963976,1003028770,2535,3945,0.0,0.0
1,1033246640,1003863580,58,58,45.603,56.574
2,1033215157,1003863580,124,126,22.833,53.329
3,1023223898,1003863580,1739,1872,0.169,5.185
4,1023253549,1003863580,34,53,31.887,50.676


In [18]:
#save as new table in database called nash_nppes
db = sqlite3.connect('../data/nppes_lite.sqlite') #open connection

hops.to_sql('nash_hop_team', db, if_exists = 'append', index = False) #save nashville hop_team

db.close() #close connection

In [60]:
# create a database or connect to an existing one
#db = sqlite3.connect('../data/nppes_lite.sqlite')
# if you need to edit the database...
#cursor = db.cursor()
# Drop the table and return a line that says that it's gone
#cursor.execute("DROP TABLE nash_hop_team")
#print("Table dropped...")

### Merge nash_nppes and nash_hop_team data:

In [36]:
db = sqlite3.connect('../data/nppes_lite.sqlite') #reopen the connection

query = """
WITH provider AS(
SELECT DISTINCT(npi), first_name || ' ' || last_name AS from_physician , 
        address_1 AS from_address,
        taxonomy_code AS from_tax,
        grouping AS from_grouping,
        classification AS from_classification
FROM nash_nppes
INNER JOIN nash_hop_team
ON npi = from_npi
),

hospital AS(
SELECT DISTINCT(npi), 
        org_name AS to_facility,
        address_1 AS to_address,
        taxonomy_code AS to_tax,
        grouping AS to_grouping,
        classification AS to_classification
FROM nash_nppes
INNER JOIN nash_hop_team
ON npi = to_npi
) 

SELECT from_npi, from_physician, from_address,from_tax, from_grouping, from_classification, 
       to_npi, to_facility, to_address, to_tax, to_grouping, to_classification,
       patient_count, transaction_count, average_day_wait, std_day_wait
        
FROM nash_hop_team
INNER JOIN provider as p
ON p.npi = from_npi
INNER JOIN hospital as h
ON h.npi = to_npi

"""
final = pd.read_sql(query,db)

db.close() 

In [37]:
final.head()

Unnamed: 0,from_npi,from_physician,from_address,from_tax,from_grouping,from_classification,to_npi,to_facility,to_address,to_tax,to_grouping,to_classification,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003963976,BENJAMIN HAYES,3098 CAMPBELL STATION PKWY STE A201,207NP0225X,Allopathic & Osteopathic Physicians,Dermatology,1003028770,SPRING HILL DERMATOLOGY PLC,1229 RESERVE BLVD,207N00000X,Allopathic & Osteopathic Physicians,Dermatology,2535,3945,0.0,0.0
1,1033246640,OUIDA COLLINS,3601 TVC,207Q00000X,Allopathic & Osteopathic Physicians,Family Medicine,1003863580,"ASSOCIATED PATHOLOGISTS, LLC",5301 VIRGINIA WAY STE 300,207ZP0102X,Allopathic & Osteopathic Physicians,Pathology,58,58,45.603,56.574
2,1033215157,KENDRA RENNELL,1224 TROTWOOD AVE,174400000X,Other Service Providers,Specialist,1003863580,"ASSOCIATED PATHOLOGISTS, LLC",5301 VIRGINIA WAY STE 300,207ZP0102X,Allopathic & Osteopathic Physicians,Pathology,124,126,22.833,53.329
3,1023223898,RANDALL WOODFORD,5301 VIRGINIA WAY,207ZP0102X,Allopathic & Osteopathic Physicians,Pathology,1003863580,"ASSOCIATED PATHOLOGISTS, LLC",5301 VIRGINIA WAY STE 300,207ZP0102X,Allopathic & Osteopathic Physicians,Pathology,1739,1872,0.169,5.185
4,1023253549,RHETT BRUNER,1501 WOODLAND POINTE DR,111N00000X,Chiropractic Providers,Chiropractor,1003863580,"ASSOCIATED PATHOLOGISTS, LLC",5301 VIRGINIA WAY STE 300,207ZP0102X,Allopathic & Osteopathic Physicians,Pathology,34,53,31.887,50.676


In [38]:
final.shape

(41127, 16)

In [61]:
#save as new table in database called nash_nppes_hop_team
db = sqlite3.connect('../data/nppes_lite.sqlite') #open connection

final.to_sql('nash_nppes_hop_team', db, if_exists = 'append', index = False) #save nashville hop_team

db.close() #close connection

### Clean column classification from hospitals:

In [54]:
nash_nppes_hop_team.to_classification.value_counts()

Internal Medicine                                                   10292
General Acute Care Hospital                                          6425
Radiology                                                            4609
Specialist                                                           2560
Anesthesiology                                                       1955
Ambulance                                                            1743
Emergency Medicine                                                   1564
Pathology                                                            1410
Family Medicine                                                      1371
Clinic/Center                                                        1298
Orthopaedic Surgery                                                   966
Durable Medical Equipment & Medical Supplies                          910
Clinical Medical Laboratory                                           700
Nurse Practitioner                    

In [58]:
#define a list of elements we want to remove from the to_classification column
exclude_to_classification = ["Radiology", 
                             "Anesthesiology", 
                             "Ambulance", 
                             "Emergency Medicine", 
                             "Pathology", 
                             "Durable Medical Equipment & Medical Supplies", 
                             "Clinical Medical Laboratory", 
                             "Portable X-ray and/or Other Portable Diagnostic Imaging Supplier", 
                             "Pharmacy", 
                             "Audiologist-Hearing Aid Fitter", 
                             "Prosthetic/Orthotic Supplier", 
                             "Eyewear Supplier", 
                             "Preferred Provider Organization"]
#use boolean mask to say 'keep everything that is NOT included in this list'
excluded_classification = nash_nppes_hop_team[~nash_nppes_hop_team.to_classification.isin(exclude_to_classification)]
excluded_classification.info()  

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27871 entries, 0 to 41126
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   from_npi             27871 non-null  int64  
 1   from_physician       27866 non-null  object 
 2   from_address         27871 non-null  object 
 3   from_tax             27871 non-null  object 
 4   from_grouping        27871 non-null  object 
 5   from_classification  27871 non-null  object 
 6   to_npi               27871 non-null  int64  
 7   to_facility          27871 non-null  object 
 8   to_address           27871 non-null  object 
 9   to_tax               27871 non-null  object 
 10  to_grouping          27871 non-null  object 
 11  to_classification    27871 non-null  object 
 12  patient_count        27871 non-null  int64  
 13  transaction_count    27871 non-null  int64  
 14  average_day_wait     27871 non-null  float64
 15  std_day_wait         27871 non-null 

In [59]:
excluded_classification.head()

Unnamed: 0,from_npi,from_physician,from_address,from_tax,from_grouping,from_classification,to_npi,to_facility,to_address,to_tax,to_grouping,to_classification,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003963976,BENJAMIN HAYES,3098 CAMPBELL STATION PKWY STE A201,207NP0225X,Allopathic & Osteopathic Physicians,Dermatology,1003028770,SPRING HILL DERMATOLOGY PLC,1229 RESERVE BLVD,207N00000X,Allopathic & Osteopathic Physicians,Dermatology,2535,3945,0.0,0.0
33,1043281710,JOSEPH SCOTT,1272 GARRISON DRIVE,207R00000X,Allopathic & Osteopathic Physicians,Internal Medicine,1003914201,SLEEP CENTERS OF MIDDLE TENNESSEE LLC,1505 WILLIAMS DR STE 200,207RS0012X,Allopathic & Osteopathic Physicians,Internal Medicine,42,51,33.941,33.132
34,1013958776,AHMAD ABU-HALIMAH,VANDERBILT UNIVERSITY MEDICAL CTR. CARDIOLOGY ...,207RC0000X,Allopathic & Osteopathic Physicians,Internal Medicine,1003914201,SLEEP CENTERS OF MIDDLE TENNESSEE LLC,1505 WILLIAMS DR STE 200,207RS0012X,Allopathic & Osteopathic Physicians,Internal Medicine,62,76,49.618,56.469
35,1003075862,AKASHIA ANDERSON,211 HERITAGE PARK DR,207Q00000X,Allopathic & Osteopathic Physicians,Family Medicine,1013109511,MID SOUTH PSYCHIATRIC ASSOCIATES,1830 HERITAGE PARK PLZ,103TP0016X,Behavioral Health & Social Service Providers,Psychologist,108,354,22.073,19.142
36,1043232879,JONATHAN GORDON,3024 BUSINESS PARK CIR,2085R0202X,Allopathic & Osteopathic Physicians,Radiology,1013224617,WILLIAM G BOGER MD PLLC,2000 RESERVE BLVD,207R00000X,Allopathic & Osteopathic Physicians,Internal Medicine,80,83,23.048,36.056


### Look at interaction physician and hospital where average waiting time = 0, which most likely mean physician work in this specific hospital:

In [62]:
no_wait = excluded_classification[excluded_classification.average_day_wait == 0]

In [63]:
no_wait.head()

Unnamed: 0,from_npi,from_physician,from_address,from_tax,from_grouping,from_classification,to_npi,to_facility,to_address,to_tax,to_grouping,to_classification,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003963976,BENJAMIN HAYES,3098 CAMPBELL STATION PKWY STE A201,207NP0225X,Allopathic & Osteopathic Physicians,Dermatology,1003028770,SPRING HILL DERMATOLOGY PLC,1229 RESERVE BLVD,207N00000X,Allopathic & Osteopathic Physicians,Dermatology,2535,3945,0.0,0.0
37,1033482393,FAWN HOLSOMBECK,3601 THE VANDERBILT CLINIC,367500000X,Physician Assistants & Advanced Practice Nursi...,"Nurse Anesthetist, Certified Registered",1023025475,"WESLEY & KLIPPENSTEIN, PC",1800 CHURCH ST,207WX0200X,Allopathic & Osteopathic Physicians,Ophthalmology,85,87,0.0,0.0
69,1013918499,JONATHON MCGEE,521 W MAIN ST,225100000X,"Respiratory, Developmental, Rehabilitative and...",Physical Therapist,1033353727,"URBAN PHYSICAL THERAPY, INC",521 W MAIN ST,261QP2000X,Ambulatory Health Care Facilities,Clinic/Center,78,614,0.0,0.0
78,1033323159,CATHERINE KIRKLAND,395 WALLACE RD.,363L00000X,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,1043297542,UROLOGY ASSOCIATES PC,2801 CHARLOTTE AVE,208800000X,Allopathic & Osteopathic Physicians,Urology,205,674,0.0,0.0
118,1033144647,MARK MELSON,4306 HARDING PIKE,207W00000X,Allopathic & Osteopathic Physicians,Ophthalmology,1083012181,OCULOPLASTIC SPECIALISTS OF TENNESSEE LLC,4306 HARDING PIKE,207W00000X,Allopathic & Osteopathic Physicians,Ophthalmology,245,374,0.0,0.0


In [64]:
no_wait.shape

(1983, 16)

In [65]:
fig = px.scatter(no_wait, x="from_physician", y="patient_count",
        color="to_facility",
        title=" ")
fig.update(layout_showlegend=False)
fig.show()