# Database Check

*Some code to grab database metadata including table names and columns.*

In [1]:
import sqlite3
import pandas as pd

In [2]:
# Assign sqlite database to db variable
db = sqlite3.connect('../data/hop_teaming.sqlite')

In [3]:
# Create a list of all of the tables in the database and print it
tables = db.execute("SELECT name FROM sqlite_master WHERE type='table';")
all_tables = []
for table in tables:
    all_tables = all_tables + list(table[0].split(" "))

print(all_tables)

['cbsa', 'referrals', 'nashville_referrals', 'nppes', 'specialty']


In [4]:
# Return a list of tuples, each with a table and all of its column names
table_cols = []

for table in all_tables:
    temp = db.execute("PRAGMA table_info("+table+")").fetchall()
    temp = tuple(['table name: ' + table] + [[e[1] for e in temp]])
    table_cols.append(temp)

print(table_cols)

[('table name: cbsa', ['ZIP', 'CBSA']), ('table name: referrals', ['index', 'from_npi', 'to_npi', 'patient_count', 'transaction_count', 'average_day_wait', 'std_day_wait']), ('table name: nashville_referrals', ['index', 'from_npi', 'to_npi', 'patient_count', 'transaction_count', 'average_day_wait', 'std_day_wait']), ('table name: nppes', ['npi', 'entity_type_code', 'provider_organization_name', 'provider_last_name', 'provider_first_name', 'provider_middle_name', 'provider_name_prefix', 'provider_name_suffix', 'provider_credential', 'address_01', 'address_02', 'city', 'state', 'zip_9', 'zip_5']), ('table name: specialty', ['npi', 'primary_taxonomy', 'Classification'])]


In [5]:
# Return the column names for the database - this is messier so commenting it out here
# table_cols = db.execute("SELECT sql FROM sqlite_master WHERE type = 'table';")
# for table_col in table_cols:
    # print(table_col)

In [6]:
cbsa = """
SELECT COUNT(*) AS tot_rows
    , COUNT(DISTINCT ZIP) AS num_zips
    , COUNT(DISTINCT CBSA) AS num_CBSAs

FROM cbsa    
"""
cbsa = pd.read_sql(cbsa, db)

In [7]:
cbsa

Unnamed: 0,tot_rows,num_zips,num_CBSAs
0,135,135,1


In [8]:
specialty = """
SELECT COUNT(*) AS tot_rows
    , COUNT(DISTINCT npi) AS num_npis

FROM specialty
"""
specialty = pd.read_sql(specialty, db)

In [9]:
specialty

Unnamed: 0,tot_rows,num_npis
0,6714038,6714038


In [10]:
referrals = """
SELECT COUNT(*) AS tot_rows
    , COUNT(DISTINCT from_npi) AS tot_from_npis
    , COUNT(DISTINCT to_npi) AS tot_to_npis

FROM referrals
"""
referrals = pd.read_sql(referrals, db)

In [11]:
referrals

Unnamed: 0,tot_rows,tot_from_npis,tot_to_npis
0,31704890,1258830,1250130


In [13]:
nashville_referrals = """
SELECT COUNT(*) AS tot_rows
    , COUNT(DISTINCT from_npi) AS tot_from_npis
    , COUNT(DISTINCT to_npi) AS tot_to_npis

FROM nashville_referrals
"""
nashville_referrals = pd.read_sql(nashville_referrals, db)

In [14]:
nashville_referrals

Unnamed: 0,tot_rows,tot_from_npis,tot_to_npis
0,290032,37505,38811


In [15]:
nppes = """
SELECT COUNT(*) AS tot_rows
    , COUNT(DISTINCT npi) AS num_npis

FROM nppes
"""
nppes = pd.read_sql(nppes, db)

In [16]:
nppes

Unnamed: 0,tot_rows,num_npis
0,6714038,6714038


In [17]:
# Close the db connection
db.close()