# Create a table that only contains

In [1]:
# import libraries
import pandas as pd
import sqlite3

In [2]:
# Assign sqlite database to db variable
db = sqlite3.connect('../data/hop_teaming.sqlite')

In [3]:
# Write a query to get all referrals from Nashville providers
# AND all referrals to Nashville providers
query = """
SELECT r.*

FROM referrals AS r

JOIN nppes AS n1
    ON r.to_npi = n1.npi

JOIN cbsa AS c
    ON c.ZIP = n1.zip_5

WHERE n1.state IN ('TN', 'TENNESSEE')

UNION

SELECT r.*

FROM referrals AS r

LEFT JOIN nppes AS n2
    ON r.from_npi = n2.npi

JOIN cbsa AS c
    ON c.ZIP = n2.zip_5

WHERE n2.state IN ('TN', 'TENNESSEE')
"""

In [4]:
# Create a pandas dataframe with the contents of the query above
nashville_providers = pd.read_sql(query, db)

In [5]:
# Take a look at that dataframe
nashville_providers.head()

Unnamed: 0,index,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,20,1003863580,1003001066,109,134,9.612,35.369
1,335,1023055126,1003013947,69,142,5.535,27.885
2,632,1003863580,1003025826,106,107,11.869,41.021
3,705,1013154723,1003028770,32,50,41.94,52.32
4,706,1003963976,1003028770,2535,3945,0.0,0.0


In [6]:
nashville_providers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290010 entries, 0 to 290009
Data columns (total 7 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   index              290010 non-null  int64  
 1   from_npi           290010 non-null  int64  
 2   to_npi             290010 non-null  int64  
 3   patient_count      290010 non-null  int64  
 4   transaction_count  290010 non-null  int64  
 5   average_day_wait   290010 non-null  float64
 6   std_day_wait       290010 non-null  float64
dtypes: float64(2), int64(5)
memory usage: 15.5 MB


In [7]:
# Read the dataframe back into a new table, drop the index
nashville_providers.to_sql('nashville_referrals', db, if_exists = 'replace', index = False)

In [8]:
# Check the tables in the database to make sure it's created
tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';")
for table in tables:
    print(table[0])

cbsa
referrals
nppes
specialty
hospital_names
nashville_referrals


In [9]:
# Write a test query against the new table
query = """
SELECT COUNT(*)

FROM nashville_referrals
"""

In [10]:
# Assign the test query results to a dataframe
test = pd.read_sql(query, db)

In [11]:
# Take a look at the top 5 rows of the df
test.head()

Unnamed: 0,COUNT(*)
0,290010
