In [62]:
import pandas as pd
import sqlite3
from tqdm.notebook import tqdm

In [22]:
hop_teaming = pd.read_csv('../hop_team-team-zeppelin/data/DocGraph_Hop_Teaming_2018.csv', 
                          nrows = 100)
hop_teaming.head()

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1508062167,1730166109,350,370,53.922,72.612
1,1508065640,1730166109,25,25,49.8,55.006
2,1508052093,1730166109,16,16,109.5,70.593
3,1508172545,1730166109,14,14,103.357,75.483
4,1508285131,1730166109,20,21,89.952,89.88


In [11]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [7]:
db = sqlite3.connect('hop_teaming.sqlite')

for chunk in tqdm(pd.read_csv('DocGraph_Hop_Teaming_2018.csv', chunksize = 10000)): 
    chunk.to_sql('hop', db, if_exists = 'append', index = False) 

0it [00:00, ?it/s]

In [106]:
# This takes about 4 minutes to complete on my computer. Need to check for mistakes but might be a start.

# Monica's magic loop!
def get_primary_taxonomy(row):
    for n in range(1, 16):
        if row['primary_taxonomy'] == '':
            x = str(n)
            if row[f'Healthcare Provider Primary Taxonomy Switch_{x}'] == 'Y':
                return row[f'Healthcare Provider Taxonomy Code_{x}']
    return row['primary_taxonomy']

# Define list of relevant columns
cols = ['NPI', 'Entity Type Code', 
        'Provider Organization Name (Legal Business Name)',
        'Provider Last Name (Legal Name)', 'Provider First Name', 
        'Provider Middle Name', 'Provider Name Prefix Text',
        'Provider Name Suffix Text', 'Provider Credential Text',
        'Provider First Line Business Practice Location Address',
        'Provider Second Line Business Practice Location Address',
        'Provider Business Practice Location Address City Name',
        'Provider Business Practice Location Address State Name',
        'Provider Business Practice Location Address Postal Code']

# Append taxonomy columns 1 to 15
for i in range(1, 16):
    cols.append(f'Healthcare Provider Primary Taxonomy Switch_{i}')
    cols.append(f'Healthcare Provider Taxonomy Code_{i}')

# Read in CSV and relevant columns in chunks
chunksize = 1000000
npi_chunks = pd.read_csv('../data/npidata_pfile_20050523-20230212.csv', usecols = cols, low_memory = False, chunksize = chunksize)

# Define empty dataframe
npi_test = pd.DataFrame()

# Loop over the chunks and process the data
for chunk in npi_chunks:
    # Process data with apply function
    chunk['primary_taxonomy'] = ''
    chunk['primary_taxonomy'] = chunk.apply(get_primary_taxonomy, axis = 1)

    # Append processed data to the empty dataframe
    npi_test = pd.concat([npi_test, chunk])

# Read in other csv files with type cleaning    
nucc = pd.read_csv('../data/nucc_taxonomy_230.csv', encoding = 'latin1')
zip = pd.read_csv('../data/ZIP_CBSA_122021.csv', dtype = {'zip': str})
zip['cbsa'] = zip['cbsa'].astype(str)

# Merge and edit zip codes
merged_df = pd.merge(npi_test, nucc[['Code', 'Classification']], left_on = 'primary_taxonomy', right_on = 'Code', how = 'left')
merged_df['Provider Business Practice Location Address Postal Code'] = merged_df['Provider Business Practice Location Address Postal Code'].str[:5]
merged_df = pd.merge(merged_df, zip[['zip', 'cbsa']], left_on = 'Provider Business Practice Location Address Postal Code', right_on = 'zip', how = 'left')

merged_df.tail(25)

Unnamed: 0,NPI,Entity Type Code,Provider Organization Name (Legal Business Name),Provider Last Name (Legal Name),Provider First Name,Provider Middle Name,Provider Name Prefix Text,Provider Name Suffix Text,Provider Credential Text,Provider First Line Business Practice Location Address,Provider Second Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Healthcare Provider Taxonomy Code_1,Healthcare Provider Primary Taxonomy Switch_1,Healthcare Provider Taxonomy Code_2,Healthcare Provider Primary Taxonomy Switch_2,Healthcare Provider Taxonomy Code_3,Healthcare Provider Primary Taxonomy Switch_3,Healthcare Provider Taxonomy Code_4,Healthcare Provider Primary Taxonomy Switch_4,Healthcare Provider Taxonomy Code_5,Healthcare Provider Primary Taxonomy Switch_5,Healthcare Provider Taxonomy Code_6,Healthcare Provider Primary Taxonomy Switch_6,Healthcare Provider Taxonomy Code_7,Healthcare Provider Primary Taxonomy Switch_7,Healthcare Provider Taxonomy Code_8,Healthcare Provider Primary Taxonomy Switch_8,Healthcare Provider Taxonomy Code_9,Healthcare Provider Primary Taxonomy Switch_9,Healthcare Provider Taxonomy Code_10,Healthcare Provider Primary Taxonomy Switch_10,Healthcare Provider Taxonomy Code_11,Healthcare Provider Primary Taxonomy Switch_11,Healthcare Provider Taxonomy Code_12,Healthcare Provider Primary Taxonomy Switch_12,Healthcare Provider Taxonomy Code_13,Healthcare Provider Primary Taxonomy Switch_13,Healthcare Provider Taxonomy Code_14,Healthcare Provider Primary Taxonomy Switch_14,Healthcare Provider Taxonomy Code_15,Healthcare Provider Primary Taxonomy Switch_15,primary_taxonomy,Code,Classification,zip,cbsa
8420052,1598302184,1.0,,LINSKER,ER,,,,"LMSW, MFA",31 WASHINGTON SQ W,,NEW YORK,NY,10011.0,104100000X,N,1041C0700X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,1041C0700X,1041C0700X,Social Worker,10011.0,35620.0
8420053,1871748715,1.0,,PHILLIPS,LESLIE,ANN,DR.,,O.D.,7268 JARNIGAN RD,SUITE 200,CHATTANOOGA,TN,37421.0,152W00000X,N,152W00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,152W00000X,152W00000X,Optometrist,37421.0,16860.0
8420054,1417547753,1.0,,PETITT,JORDAN,NICOLE,,,LMFT,2904 ROWENA AVE,,LOS ANGELES,CA,90039.0,101YM0800X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,101YM0800X,101YM0800X,Counselor,90039.0,31080.0
8420055,1780017483,1.0,,PREVIL,LUCKSON,,DR.,,O.D.,7268 JARNIGAN RD,SUITE 200,CHATTANOOGA,TN,37421.0,152W00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,152W00000X,152W00000X,Optometrist,37421.0,16860.0
8420056,1548719503,1.0,,THOMPSON,PAIGE,,DR.,,OD,7268 JARNIGAN RD,SUITE 200,CHATTANOOGA,TN,37421.0,152W00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,152W00000X,152W00000X,Optometrist,37421.0,16860.0
8420057,1932492303,1.0,,WHITMIRE,WILLIAM,CHALKLEY,DR.,,M.D.,9453 DAYTON PIKE,,SODDY DAISY,TN,37379.0,207W00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,207W00000X,207W00000X,Ophthalmology,37379.0,99999.0
8420058,1932492303,1.0,,WHITMIRE,WILLIAM,CHALKLEY,DR.,,M.D.,9453 DAYTON PIKE,,SODDY DAISY,TN,37379.0,207W00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,207W00000X,207W00000X,Ophthalmology,37379.0,16860.0
8420059,1831807494,2.0,LARRUA BEHAVIOR SERVICES LLC,,,,,,,12484 NW SOUTH RIVER DR STE 550A,,MEDLEY,FL,33178.0,103K00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,103K00000X,103K00000X,Behavior Analyst,33178.0,33100.0
8420060,1417007691,1.0,,YANNIS,REX,A,DR.,,M.D.,7268 JARNIGAN RD,SUITE 200,CHATTANOOGA,TN,37421.0,207W00000X,N,207W00000X,N,208200000X,N,207WX0200X,Y,,,,,,,,,,,,,,,,,,,,,,,207WX0200X,207WX0200X,Ophthalmology,37421.0,16860.0
8420061,1265102255,1.0,,KAIPOV,MYRNA,JUDITH,,,,3857 MARTIN WAY E,,OLYMPIA,WA,98506.0,390200000X,N,101Y00000X,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,101Y00000X,101Y00000X,Counselor,98506.0,36500.0
