# Chunking and SQLite

In this notebook, we'll see a couple of techniques that can be used when working with large file in Python.

In [2]:
import pandas as pd
import numpy as np
import sqlite3
from tqdm.notebook import tqdm

Before we do anything else, let's see how many rows are contained in the Calls for Service file. 

In [None]:
#using terminal  "wc -l file.txt"

In [None]:
npi_cols=['NPI',
 'Entity Type Code',
 'Replacement NPI',
 'Employer Identification Number (EIN)',
 'Provider Organization Name (Legal Business Name)',
 'Provider Last Name (Legal Name)',
 'Provider First Name',
 'Provider Middle Name',
 'Provider Name Prefix Text',
 'Provider Name Suffix Text',
 'Provider Credential Text',
 'Provider First Line Business Practice Location Address',
 'Provider Second Line Business Practice Location Address',
 'Provider Business Practice Location Address City Name',
 'Provider Business Practice Location Address State Name',
 'Provider Business Practice Location Address Postal Code',
 'Provider Gender Code',
 'Healthcare Provider Taxonomy Code_1',
 'Healthcare Provider Primary Taxonomy Switch_1',
 'Healthcare Provider Taxonomy Code_2',
 'Healthcare Provider Primary Taxonomy Switch_2',
 'Healthcare Provider Taxonomy Code_3',
 'Healthcare Provider Primary Taxonomy Switch_3',
 'Healthcare Provider Taxonomy Code_4',
 'Healthcare Provider Primary Taxonomy Switch_4',
 'Healthcare Provider Taxonomy Code_5',
 'Healthcare Provider Primary Taxonomy Switch_5',
 'Healthcare Provider Taxonomy Code_6',
 'Healthcare Provider Primary Taxonomy Switch_6',
 'Healthcare Provider Taxonomy Code_7',
 'Healthcare Provider Primary Taxonomy Switch_7',
 'Healthcare Provider Taxonomy Code_8',
 'Healthcare Provider Primary Taxonomy Switch_8',
 'Healthcare Provider Taxonomy Code_9',
 'Healthcare Provider Primary Taxonomy Switch_9',
 'Healthcare Provider Taxonomy Code_10',
 'Healthcare Provider Primary Taxonomy Switch_10',
 'Healthcare Provider Taxonomy Code_11',
 'Healthcare Provider Primary Taxonomy Switch_11',
 'Healthcare Provider Taxonomy Code_12',
 'Healthcare Provider Primary Taxonomy Switch_12',
 'Healthcare Provider Taxonomy Code_13',
 'Healthcare Provider Primary Taxonomy Switch_13',
 'Healthcare Provider Taxonomy Code_14',
 'Healthcare Provider Primary Taxonomy Switch_14',
 'Healthcare Provider Taxonomy Code_15',
 'Healthcare Provider Primary Taxonomy Switch_15']

First, we need to connect to our database. The connect function will either create a new database if one does not already exist or connect to an existing one.

In [None]:
db = sqlite3.connect('nppes.sqlite')

Now, we can chunk through the data and for each row, add the rows to a table in our sqlite database. 
To keep track of how much progress has been made, we can use the `tqdm` library.

In [None]:
for chunk in tqdm(pd.read_csv('NPPES_Data_Dissemination_February_2021/npidata_pfile_20050523-20210207.csv', usecols=npi_cols, chunksize = 10000)):
    chunk.columns = [x.lower().replace(' ', '_').replace('(', '').replace(')','').replace('*','') for x in chunk.columns]      # Clean up the column names
    conditions = [
    (chunk['healthcare_provider_primary_taxonomy_switch_1']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_2']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_3']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_4']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_5']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_6']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_7']=='Y'),
    (chunk['healthcare_provider_primary_taxonomy_switch_8']=='Y')]

# create a list of the values we want to assign for each condition
    values = [chunk['healthcare_provider_taxonomy_code_1'],
          chunk['healthcare_provider_taxonomy_code_2'],
          chunk['healthcare_provider_taxonomy_code_3'],
          chunk['healthcare_provider_taxonomy_code_4'],
          chunk['healthcare_provider_taxonomy_code_5'],
          chunk['healthcare_provider_taxonomy_code_6'],
          chunk['healthcare_provider_taxonomy_code_7'],
          chunk['healthcare_provider_taxonomy_code_8']]

# create a new column and use np.select to assign values to it using our lists as arguments
    chunk['taxonomy'] = np.select(conditions, values)
    chunk[chunk.columns.drop(list(chunk.filter(regex='healthcare_provider_')))]
    chunk.to_sql('npi_data', db, if_exists = 'append', index = False)

#figure out this later
#def find_primary_code(row):
    #col = 'Healthcare Provider Primary Taxonomy Switch_'
    #for i in range(1, 16):
        #if row[col + str(i)] == 'Y':
            #return row['Healthcare Provider Taxonomy Code_' + str(i)]

In [None]:
db.close()

In [None]:
db = sqlite3.connect('nppes.sqlite')
query='''
select *
from npi_data
order by npi;
'''
npi_data = pd.read_sql(query, db)
npi_data

In [None]:
db.close()

In [None]:
npi_data.duplicated(subset=['npi']).value_counts()

In [None]:
npi_data=npi_data.drop_duplicates(subset=['npi'])

In [None]:
db = sqlite3.connect('nppes.sqlite')
nucc_tax=pd.read_csv('NPPES_Data_Dissemination_February_2021/nucc_taxonomy_210.csv')
nucc_tax.columns = [x.lower().replace(' ', '_') for x in nucc_tax.columns]
nucc_tax.to_sql('nucc_tax', db,  if_exists='append', index = False)    


In [None]:
query='''
select *
from nucc_tax;
'''
nucc_tax = pd.read_sql(query, db)
nucc_tax

In [None]:
db.close()

In [None]:
nucc_tax.info()

To speed up queries which use a specific column, we can create an **index** on that column. This causes the database to store that column in a way that helps it to retrieve rows quicker.

In [None]:
db = sqlite3.connect('nppes.sqlite')
db.execute('CREATE INDEX taxonomy ON npi_data(taxonomy)')
db.execute('CREATE INDEX code ON nucc_tax(code)')

In [None]:
db = sqlite3.connect('nppes.sqlite')
cbsa=pd.read_csv('NPPES_Data_Dissemination_February_2021/ZIP_CBSA_122020.csv')
cbsa.columns = [x.lower().replace(' ', '_') for x in cbsa.columns]
cbsa.to_sql('cbsa', db,  if_exists='append', index = False)

In [None]:
db = sqlite3.connect('nppes.sqlite')
query= '''
select *       
from cbsa 
'''
cbsa = pd.read_sql(query, db)
cbsa

In [None]:
db = sqlite3.connect('nppes.sqlite')
query= '''
select *      
from npi_data as np
left join nucc_tax as nu
on np.taxonomy=nu.code 
join cbsa as c
on np.provider_business_practice_location_address_postal_code=c.zip 
'''
npp_nucc_tax = pd.read_sql(query, db)
npp_nucc_tax

In [None]:
df=npp_nucc_tax[['npi', 'provider_business_practice_location_address_postal_code', 'specialization', 'zip','cbsa']]
df.sort_values(by=['cbsa'])

In [None]:
cbsa.info()

In [None]:
df.duplicated(subset=['npi'], ).value_counts()

In [None]:
npp_nucc=pd.merge(npi_data, nucc_tax,left_on='taxonomy', right_on='code', how='left')

In [None]:
npp_nucc.duplicated(subset=['npi']).value_counts()

In [None]:
npp_nucc.head(50)


In [None]:
npp_nucc['postal_code']=npp_nucc['provider_business_practice_location_address_postal_code'].astype(str).str[0:5]

cbsa['zip1']=cbsa['zip'].astype(str).str[0:5]


In [None]:
npp_nucc_cbsa=pd.merge(npp_nucc, cbsa,left_on= 'postal_code', right_on='zip1', how='left')

In [None]:
npp_nucc_cbsa.duplicated(subset=['npi']).value_counts()

In [None]:
npp_nucc_cbsa_nash=npp_nucc_cbsa[npp_nucc_cbsa['cbsa']==34980]


In [None]:
npp_nucc_cbsa_nash.to_csv('npp_taxo_cbsa_nash.csv',index=False)

In [None]:
npp_nucc_cbsa_nash.duplicated(subset=['npi']).value_counts()

In [None]:
npp_nucc_cbsa[['npi','specialization','zip']].head(50)

In [None]:
pd.read_csv('DocGraph_Hop_Teaming_2017_Non_Commercial/DocGraph_Hop_Teaming_2017.csv', nrows = 1000).info()

In [2]:
db = sqlite3.connect('nppes.sqlite')
for chunk in tqdm(pd.read_csv('DocGraph_Hop_Teaming_2017_Non_Commercial/DocGraph_Hop_Teaming_2017.csv', chunksize = 100000)):
    chunk=chunk[(chunk['transaction_count']>=50) | (chunk['average_day_wait']<=50)]
    chunk.to_sql('hop', db, if_exists ='append', index = False)


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [17]:
db.close()

In [4]:
db = sqlite3.connect('nppes.sqlite')
query='''
select *
from hop;
'''
hop_team = pd.read_sql(query, db)
hop_team 

Unnamed: 0,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003863580,1000000004,19,19,108.895,84.598
1,1043250400,1000000004,20,20,87.000,77.173
2,1033239413,1000000004,20,20,58.800,76.982
3,1033142146,1000000004,491,535,10.232,36.558
4,1013957562,1000000004,25,26,78.692,59.305
...,...,...,...,...,...,...
203330902,1235291360,1255367132,12,13,44.769,96.969
203330903,1235180266,1255367132,23,23,13.739,21.020
203330904,1265448617,1255367132,15,16,10.938,25.702
203330905,1265437644,1255367132,78,82,26.561,51.304


In [43]:
npp_nucc_cbsa_nash=pd.read_csv('npp_taxo_cbsa_nash.csv') 
npp_nucc_cbsa_nash['specialization'].unique().counts


AttributeError: 'numpy.ndarray' object has no attribute 'counts'

In [None]:
#npp_nucc_cbsa_nash['npp_npi']=npp_nucc_cbsa_nash['npi'].astype(str)
#hop_team['hop_from_npi']=hop_team['from_npi'].astype(str)
#hop_team['hop_to_npi']=hop_team['to_npi'].astype(str)

In [34]:
npp_nucc_cbsa_nash_hop=pd.merge(npp_nucc_cbsa_nash, hop_team, left_on= 'npi', right_on='from_npi', how='inner')

In [8]:
npp_nucc_cbsa_nash_hop1=pd.merge(npp_nucc_cbsa_nash, hop_team, left_on= 'npi', right_on='to_npi', how='inner')

In [None]:
npp_nucc_cbsa_nash_hop_horiz=pd.merge(npp_nucc_cbsa_nash_hop, hop_team, left_on= 'npi', right_on='to_npi', how='inner')

In [12]:
npp_nucc_cbsa_nash_hop.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1562518 entries, 0 to 1562517
Data columns (total 42 columns):
 #   Column                                                   Non-Null Count    Dtype  
---  ------                                                   --------------    -----  
 0   npi                                                      1562518 non-null  int64  
 1   entity_type_code                                         1562518 non-null  float64
 2   replacement_npi                                          0 non-null        float64
 3   employer_identification_number_ein                       650980 non-null   object 
 4   provider_organization_name_legal_business_name           650980 non-null   object 
 5   provider_last_name_legal_name                            911402 non-null   object 
 6   provider_first_name                                      911538 non-null   object 
 7   provider_middle_name                                     731230 non-null   object 
 8   pr

In [24]:
npp_nucc_cbsa_nash_hop=npp_nucc_cbsa_nash_hop[npp_nucc_cbsa_nash_hop['entity_type_code']==1]

In [25]:
npp_nucc_cbsa_nash_hop.to_csv('npp_nucc_cbsa_nash_hop_from.csv',index=False)

In [26]:
npp_nucc_cbsa_nash_hop1=npp_nucc_cbsa_nash_hop1[npp_nucc_cbsa_nash_hop1['entity_type_code']==2]

In [27]:
npp_nucc_cbsa_nash_hop1.to_csv('npp_nucc_cbsa_nash_hop_to.csv',index=False)

In [28]:
npp_hop_dfs=[npp_nucc_cbsa_nash_hop,npp_nucc_cbsa_nash_hop1]

In [29]:
npp_nucc_cbsa_nash_hop_all=pd.concat(npp_hop_dfs, keys=['from', 'to'])

In [44]:
npp_nucc_cbsa_nash_hop

<class 'pandas.core.frame.DataFrame'>
Int64Index: 650980 entries, 106 to 1562336
Data columns (total 42 columns):
 #   Column                                                   Non-Null Count   Dtype  
---  ------                                                   --------------   -----  
 0   npi                                                      650980 non-null  int64  
 1   entity_type_code                                         650980 non-null  float64
 2   replacement_npi                                          0 non-null       float64
 3   employer_identification_number_ein                       650980 non-null  object 
 4   provider_organization_name_legal_business_name           650980 non-null  object 
 5   provider_last_name_legal_name                            0 non-null       object 
 6   provider_first_name                                      0 non-null       object 
 7   provider_middle_name                                     0 non-null       object 
 8   provider_na

In [30]:
npp_nucc_cbsa_nash_hop_all.to_csv('npp_nucc_cbsa_nash_hopall.csv',index=False)

In [36]:
npp_nucc_cbsa_nash_hop_all.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1573937 entries, ('from', 0) to ('to', 1562336)
Data columns (total 42 columns):
 #   Column                                                   Non-Null Count    Dtype  
---  ------                                                   --------------    -----  
 0   npi                                                      1573937 non-null  int64  
 1   entity_type_code                                         1573937 non-null  float64
 2   replacement_npi                                          0 non-null        float64
 3   employer_identification_number_ein                       650980 non-null   object 
 4   provider_organization_name_legal_business_name           650980 non-null   object 
 5   provider_last_name_legal_name                            922845 non-null   object 
 6   provider_first_name                                      922957 non-null   object 
 7   provider_middle_name                                     743381 non-null

In [3]:
db = sqlite3.connect('nppes.sqlite')
for chunk in tqdm(pd.read_csv('npp_taxo_cbsa_nash.csv', chunksize = 10000)):
    chunk.to_sql('nash_npp', db, if_exists ='append', index = False)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [6]:
db.close()

In [5]:
db = sqlite3.connect('nppes.sqlite')
db.execute('CREATE INDEX npi ON nash_npp(npi)')
db.execute('CREATE INDEX npif ON hop(from_npi)')
db.execute('CREATE INDEX npit ON hop(to_npi)')

<sqlite3.Cursor at 0x7fdd379c6340>

In [7]:
db = sqlite3.connect('nppes.sqlite')
query= '''
select npi, entity_type_code,
employer_identification_number_ein,
provider_organization_name_legal_business_name,
provider_last_name_legal_name,
provider_first_name,provider_business_practice_location_address_city_name,
provider_gender_code, classification,
specialization, from_npi,to_npi, patient_count, transaction_count, average_day_wait, std_day_wait

from nash_npp as nnp
join hop as h
on nnp.npi=h.from_npi 

'''
nash_npp_hop_from = pd.read_sql(query, db)
nash_npp_hop_from

Unnamed: 0,npi,entity_type_code,employer_identification_number_ein,provider_organization_name_legal_business_name,provider_last_name_legal_name,provider_first_name,provider_business_practice_location_address_city_name,provider_gender_code,classification,specialization,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1003863580,2.0,<UNAVAIL>,"ASSOCIATED PATHOLOGISTS, LLC",,,BRENTWOOD,,Pathology,Anatomic Pathology & Clinical Pathology,1003863580,1000000004,19,19,108.895,84.598
1,1023223898,1.0,,,WOODFORD,RANDALL,BRENTWOOD,M,Pathology,Anatomic Pathology & Clinical Pathology,1023223898,1003000126,36,37,56.946,76.122
2,1003863580,2.0,<UNAVAIL>,"ASSOCIATED PATHOLOGISTS, LLC",,,BRENTWOOD,,Pathology,Anatomic Pathology & Clinical Pathology,1003863580,1003000126,96,101,52.208,76.874
3,1003863580,2.0,<UNAVAIL>,"ASSOCIATED PATHOLOGISTS, LLC",,,BRENTWOOD,,Pathology,Anatomic Pathology & Clinical Pathology,1003863580,1003000308,12,12,51.750,45.167
4,1003863580,2.0,<UNAVAIL>,"ASSOCIATED PATHOLOGISTS, LLC",,,BRENTWOOD,,Pathology,Anatomic Pathology & Clinical Pathology,1003863580,1003001066,109,134,9.612,35.369
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1571414,1235186800,2.0,<UNAVAIL>,"PATHGROUP LABS, LLC",,,NASHVILLE,,Clinical Medical Laboratory,,1235186800,1255365771,39,40,86.725,65.749
1571415,1245221092,2.0,<UNAVAIL>,"VERUS HEALTHCARE, LLC",,,BRENTWOOD,,Durable Medical Equipment & Medical Supplies,,1245221092,1255365771,19,20,46.050,41.944
1571416,1265445506,2.0,<UNAVAIL>,WILLIAMSON COUNTY HOSPITAL DISTRICT,,,FRANKLIN,,General Acute Care Hospital,,1265445506,1255365987,17,23,78.957,90.442
1571417,1245221092,2.0,<UNAVAIL>,"VERUS HEALTHCARE, LLC",,,BRENTWOOD,,Durable Medical Equipment & Medical Supplies,,1245221092,1255366902,16,23,42.522,37.763


In [8]:
nash_npp_hop_from.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1571419 entries, 0 to 1571418
Data columns (total 16 columns):
 #   Column                                                 Non-Null Count    Dtype  
---  ------                                                 --------------    -----  
 0   npi                                                    1571419 non-null  int64  
 1   entity_type_code                                       1571419 non-null  float64
 2   employer_identification_number_ein                     648462 non-null   object 
 3   provider_organization_name_legal_business_name         648462 non-null   object 
 4   provider_last_name_legal_name                          922845 non-null   object 
 5   provider_first_name                                    922957 non-null   object 
 6   provider_business_practice_location_address_city_name  1571419 non-null  object 
 7   provider_gender_code                                   922957 non-null   object 
 8   classification        

In [13]:
db = sqlite3.connect('nppes.sqlite')
query= '''
select 
nnp_from.entity_type_code,
nnp_from.employer_identification_number_ein,
nnp_from.provider_organization_name_legal_business_name,
nnp_from.provider_last_name_legal_name,
nnp_from.provider_first_name,
nnp_from.provider_business_practice_location_address_city_name,
nnp_from.provider_gender_code, 
nnp_from.classification,
nnp_from.specialization, 
nnp_to.entity_type_code as entity_type_code_to,
nnp_to.employer_identification_number_ein as employer_identification_number_ein_to,
nnp_to.provider_organization_name_legal_business_name as provider_organization_name_legal_business_name_to,
nnp_to.provider_last_name_legal_name as provider_last_name_legal_name_to,
nnp_to.provider_first_name as provider_first_name_to,
nnp_to.provider_business_practice_location_address_city_name as provider_business_practice_location_address_city_name_to,
nnp_to.provider_gender_code as provider_gender_code_to, 
nnp_to.classification as classification_to,
nnp_to.specialization as specialization_to,
from_npi,to_npi, patient_count, transaction_count, average_day_wait, std_day_wait

from hop as hp
inner join nash_npp as nnp_from
on hp.from_npi=nnp_from.npi
inner join nash_npp as nnp_to
on nnp_to.npi=hp.to_npi
where nnp_from.entity_type_code=1 and nnp_to.entity_type_code=2
'''
nash_npp_hop_fromto = pd.read_sql(query, db)
nash_npp_hop_fromto


Unnamed: 0,entity_type_code,employer_identification_number_ein,provider_organization_name_legal_business_name,provider_last_name_legal_name,provider_first_name,provider_business_practice_location_address_city_name,provider_gender_code,classification,specialization,entity_type_code_to,...,provider_business_practice_location_address_city_name_to,provider_gender_code_to,classification_to,specialization_to,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
0,1.0,,,GORDON,JONATHAN,GOODLETTSVILLE,M,Radiology,Diagnostic Radiology,2.0,...,SPRING HILL,,Dermatology,,1043232879,1003028770,24,24,112.333,80.894
1,1.0,,,BLOCK,JOHN,NASHVILLE,M,Radiology,Diagnostic Radiology,2.0,...,SPRING HILL,,Dermatology,,1043302466,1003028770,24,26,98.192,97.772
2,1.0,,,REZK,HANY,COLUMBIA,M,Internal Medicine,Nephrology,2.0,...,SPRING HILL,,Dermatology,,1033297429,1003028770,56,62,53.145,58.831
3,1.0,,,MAHONEY,ROBERT,NASHVILLE,M,Radiology,Diagnostic Radiology,2.0,...,SPRING HILL,,Dermatology,,1043206329,1003028770,173,177,97.864,81.756
4,1.0,,,GANNON,CAROLINE,FRANKLIN,F,Podiatrist,Foot & Ankle Surgery,2.0,...,SPRING HILL,,Dermatology,,1003855537,1003028770,15,16,84.250,77.117
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232210,1.0,,,POPE,STAN,MURFREESBORO,M,Radiology,Diagnostic Radiology,2.0,...,SMYRNA,,Specialist,,1568568319,1992985055,18,19,55.316,70.537
232211,1.0,,,MANGIONE,NELSON,SMYRNA,M,Internal Medicine,Cardiovascular Disease,2.0,...,SMYRNA,,Specialist,,1164504544,1992985055,26,38,38.868,48.749
232212,1.0,,,HART,JAMES,FRANKLIN,M,Psychiatry & Neurology,Psychiatry,2.0,...,SMYRNA,,Specialist,,1215915871,1992985055,32,39,10.359,27.306
232213,1.0,,,LAYMAN,MATTHEW,MURFREESBORO,M,Radiology,Diagnostic Radiology,2.0,...,SMYRNA,,Specialist,,1194018531,1992985055,14,14,58.786,83.976


In [19]:
nash_npp_hop_fromto=nash_npp_hop_fromto[nash_npp_hop_fromto['transaction_count']>=50]

In [20]:
nash_npp_hop_fromto=nash_npp_hop_fromto[nash_npp_hop_fromto['average_day_wait']<=50]

In [21]:
nash_npp_hop_fromto

Unnamed: 0,entity_type_code,employer_identification_number_ein,provider_organization_name_legal_business_name,provider_last_name_legal_name,provider_first_name,provider_business_practice_location_address_city_name,provider_gender_code,classification,specialization,entity_type_code_to,...,provider_business_practice_location_address_city_name_to,provider_gender_code_to,classification_to,specialization_to,from_npi,to_npi,patient_count,transaction_count,average_day_wait,std_day_wait
9,1.0,,,HAYES,BENJAMIN,SPRING HILL,M,Dermatology,Pediatric Dermatology,2.0,...,SPRING HILL,,Dermatology,,1003963976,1003028770,2535,3945,0.000,0.000
27,1.0,,,MCBEAN,MARY,SPRING HILL,F,Family Medicine,,2.0,...,SPRING HILL,,Dermatology,,1356304984,1003028770,40,70,39.200,45.274
91,1.0,,,SULLIVAN,RYAN,SPRING HILL,M,Allergy & Immunology,Allergy,2.0,...,SPRING HILL,,Dermatology,,1417085606,1003028770,276,603,0.000,0.000
97,1.0,,,WALLACE,ROY,NASHVILLE,M,Ophthalmology,Retina Specialist,2.0,...,SPRING HILL,,Dermatology,,1417946203,1003028770,49,66,41.864,44.229
105,1.0,,,ROBB,CHRISTOPHER,SPRING HILL,M,Dermatology,Pediatric Dermatology,2.0,...,SPRING HILL,,Dermatology,,1114074093,1003028770,940,1716,0.002,0.097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232162,1.0,,,HUMPHREY,STEVEN,NASHVILLE,M,Internal Medicine,Cardiovascular Disease,2.0,...,NASHVILLE,,Internal Medicine,,1114961513,1992972087,63,98,40.510,48.107
232170,1.0,,,AL OMARY,MALEK,NASHVILLE,M,Internal Medicine,,2.0,...,NASHVILLE,,Internal Medicine,,1790833267,1992972087,187,611,0.029,0.728
232182,1.0,,,KAUFMAN,ALAN,NASHVILLE,M,Radiology,Diagnostic Radiology,2.0,...,NASHVILLE,,Internal Medicine,,1538105242,1992972087,69,82,38.256,55.007
232201,1.0,,,KATKURI,JITHANDER,SMYRNA,M,Family Medicine,,2.0,...,SMYRNA,,Specialist,,1114974979,1992985055,323,897,0.002,0.067


In [22]:
nash_npp_hop_fromto.to_csv('nash_npp_hop_fromto.csv', index=False)