### 1. Load and combine all montly reinstatement csv files under the folder '~/data/LEIE/all_exclu_from_2013'

In [1]:
import pandas as pd
import os
import glob
# use glob to get all the csv files 
# in the folder
parent = os.path.dirname(os.getcwd())

path = parent + '\\data\\LEIE\\all_rein_from_2013'
csv_files = glob.glob(os.path.join(path, "*.csv"))

rein_data = []
# loop over the list of csv files
for f in csv_files:
    # read the csv file
    rein_data.append(pd.read_csv(f, encoding = "utf-8"))
rein_data = pd.concat(rein_data)
print(rein_data.shape)

(4322, 18)


### 2. Load and combine all montly reinstatement dbf files in subfolders starting with 'rein' under same folder

In [2]:
from simpledbf import Dbf5
path2 = parent + '\\data\\LEIE\\all_rein_from_2013\\rein*'
dbf_files = glob.glob(os.path.join(path2, "*.DBF"))
rein_data2 = pd.DataFrame()
# loop over the list of csv files
for f in dbf_files:
    temp_df = Dbf5(f, codec='utf-8').to_dataframe()
    temp_df.columns = temp_df.columns.str.strip()
    rein_data2 = pd.concat([temp_df, rein_data2])

rein_data2.loc[(rein_data2['NPI'].isna()) | (rein_data2['NPI'].str[0]=='0'), 'NPI'] = '0'
rein_data2['NPI'] = rein_data2['NPI'].astype('int64')
rein_data2['ZIP'] = rein_data2['ZIP'].astype('int64')
rein_data2.loc[(rein_data2['WAIVERDATE'].isna()) | (rein_data2['WAIVERDATE'].str[0]=='0'), 'WAIVERDATE'] = '0'
rein_data2['WAIVERDATE'] = rein_data2['WAIVERDATE'].astype('int64')
rein_data2['EXCLDATE'] = rein_data2['EXCLDATE'].astype('int64')
rein_data2['REINDATE'] = rein_data2['REINDATE'].astype('int64')

print(rein_data2.shape)

(1710, 18)


### 3. Clean and concatenate reinstatement monthly data, then output to a csv file

In [3]:
rein_all = pd.concat([rein_data, rein_data2], axis=0).reset_index(drop=True)
fillvals = {'LASTNAME': '', 'FIRSTNAME': '', 'MIDNAME': '', 'BUSNAME': ''}
rein_all.fillna(value=fillvals, inplace=True)
rein_all['LASTNAME'] = rein_all['LASTNAME'].str.strip()
rein_all['FIRSTNAME'] = rein_all['FIRSTNAME'].str.strip()
rein_all['MIDNAME'] = rein_all['MIDNAME'].str.strip()
rein_all['BUSNAME'] = rein_all['BUSNAME'].str.strip()

rein_all.to_csv(parent + '\\data\\LEIE\\all_reinstate_providers_from_2013_to_2023.csv')
rein_all.head()

Unnamed: 0,LASTNAME,FIRSTNAME,MIDNAME,BUSNAME,GENERAL,SPECIALTY,UPIN,NPI,DOB,ADDRESS,CITY,STATE,ZIP,EXCLTYPE,EXCLDATE,REINDATE,WAIVERDATE,WVRSTATE
0,ACORD,KIMBERLY,DIANE,,NURSING PROFESSION,NURSE/NURSES AIDE,,0,10/29/83,"225 NW COUCH ST, #450",PORTLAND,OR,97209,1128a3,20101118,20160112,0,
1,ALMARIO,JOEY,RODRIGUEZ,,IND- LIC HC SERV PRO,NURSE/NURSES AIDE,,0,01/01/71,403 JEWEL FLOWER STREET,BAKERSFIELD,CA,93314,1128b4,20140120,20160112,0,
2,BETTS-WALKER,CAROLYN,RENEE,,NURSING PROFESSION,NURSE/NURSES AIDE,,0,02/23/70,109 HUNTERSCOVE TERRACE,HOT SPRINGS NATIONAL,AR,71913,1128a4,20010920,20160107,0,
3,BRADLEY,TAMI,JO,,NURSING PROFESSION,NURSE/NURSES AIDE,,0,01/17/70,"1300 EAST IRON, STE 109",SALINA,KS,67401,1128a3,20100819,20160113,0,
4,CALDWELL,LARRY,VON,,"MEDICAL PRACTICE, MD",GENERAL PRACTICE/FP,,0,01/08/63,858 LULLWATER PARK COURT,ATLANTA,GA,30306,1128b14,20010118,20160106,0,
