### 1. Load and combine all montly exclusion csv files under the folder '~/data/LEIE/all_exclu_from_2013'

In [1]:
import pandas as pd
import os
import glob
# use glob to get all the csv files 
# in the folder
parent = os.path.dirname(os.getcwd())

path = parent + '\\CMS_datasets\\data\\LEIE\\all_exclu_from_2013'
csv_files = glob.glob(os.path.join(path, "*.csv"))

exclu_data = []
# loop over the list of csv files
for f in csv_files:
    # read the csv file
    exclu_data.append(pd.read_csv(f, encoding = "utf-8"))
exclu_data = pd.concat(exclu_data)
print(exclu_data.shape)

(19759, 18)


### 2. Load and combine all montly exclusion dbf files in the subfolders starting with 'sanc' under same folder

In [2]:
from simpledbf import Dbf5
path2 = parent + '\\CMS_datasets\\data\\LEIE\\all_exclu_from_2013\\sanc*'
dbf_files = glob.glob(os.path.join(path2, "*.DBF"))
exclu_data2 = pd.DataFrame()
# loop over the list of csv files
for f in dbf_files:
    temp_df = Dbf5(f, codec='utf-8').to_dataframe()
    temp_df.columns = temp_df.columns.str.strip()
    exclu_data2 = pd.concat([temp_df, exclu_data2])

exclu_data2.loc[(exclu_data2['NPI'].isna()) | (exclu_data2['NPI'].str[0]=='0'), 'NPI'] = '0'
exclu_data2['NPI'] = exclu_data2['NPI'].astype('int64')
exclu_data2['ZIP'] = exclu_data2['ZIP'].astype('int64')
exclu_data2.loc[(exclu_data2['WAIVERDATE'].isna()) | (exclu_data2['WAIVERDATE'].str[0]=='0'), 'WAIVERDATE'] = '0'
exclu_data2['WAIVERDATE'] = exclu_data2['WAIVERDATE'].astype('int64')
exclu_data2['EXCLDATE'] = exclu_data2['EXCLDATE'].astype('int64')
exclu_data2['REINDATE'] = exclu_data2['REINDATE'].astype('int64')

print(exclu_data2.shape)

(11183, 18)


### 3. Clean and concatenate exclusion monthly data, then output to a csv file

In [3]:
exclu_all = pd.concat([exclu_data, exclu_data2], axis=0).reset_index(drop=True)
fillvals = {'LASTNAME': '', 'FIRSTNAME': '', 'MIDNAME': '', 'BUSNAME': ''}
exclu_all.fillna(value=fillvals, inplace=True)
exclu_all['LASTNAME'] = exclu_all['LASTNAME'].str.strip()
exclu_all['FIRSTNAME'] = exclu_all['FIRSTNAME'].str.strip()
exclu_all['MIDNAME'] = exclu_all['MIDNAME'].str.strip()
exclu_all['BUSNAME'] = exclu_all['BUSNAME'].str.strip()

exclu_all.to_csv(parent + '\\CMS_datasets\\data\\LEIE\\all_exclusion_providers_from_2013_to_2023.csv')
print(exclu_all.shape)
exclu_all.head()

(30942, 18)


Unnamed: 0,LASTNAME,FIRSTNAME,MIDNAME,BUSNAME,GENERAL,SPECIALTY,UPIN,NPI,DOB,ADDRESS,CITY,STATE,ZIP,EXCLTYPE,EXCLDATE,REINDATE,WAIVERDATE,WVRSTATE
0,,,,AMERICAN SLEEP INSTITUTE,OTHER BUSINESS,CLINIC,,1629125307,,"7150 W 20TH AVENUE, STE 510",HIALEAH,FL,33016,1128b7,20160115,0,0,
1,,,,AMERICAN THERAPEUTIC CORP,OTHER BUSINESS,CLINIC,,1215968847,,1801 NE 2ND AVENUE,MIAMI,FL,33132,1128b7,20160115,0,0,
2,,,,"DENNIS B JAFFE, D M D, PC",OTHER BUSINESS,DENTAL PRACTICE,,0,,"98 BROAD STREET, SW",ATLANTA,GA,30303,1128b8,20160218,0,0,
3,,,,MEDLINK PROFESSIONAL MANAGEMEN,OTHER BUSINESS,CLINIC,,0,,1809 NE 2ND AVENUE,MIAMI,FL,33132,1128b7,20160115,0,0,
4,,,,QUALITY HOME HEALTH CARE INC,OTHER BUSINESS,HOME HEALTH AGENCY,,1427275718,,810 CEDAR PARKWAY,SCHEREVILLE,IN,46375,1128b5,20160218,0,0,
