In [None]:
# LOAD AND CLEAN PROVIDER INFO

In [1]:
import pandas as pd
import glob
import os
import copy

# Read files with different encodings
def read_csv_with_encodings(file_path):
    # List of common encodings to try
    encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252', 'utf-16', 'utf-32']
    
    for encoding in encodings:
        try:
            df = pd.read_csv(file_path, encoding=encoding, low_memory = False)
            print(f"Successfully read the file with {encoding} encoding")
            return df
        except UnicodeDecodeError:
            print(f"Failed to read with {encoding} encoding")
    
    print("Could not read the file with any of the common encodings")
    return None

# Define the directory path
directory = '/Users/apple/Documents/APD/provider_info/original/'

# Get all CSV files matching the pattern
files = glob.glob(os.path.join(directory, 'ProviderInfo_*.csv'))

# Dictionaries to store original DataFrames
raw_provider_info = {}

# Process each file
for file_path in files:
    # Extract the year from the filename
    filename = os.path.basename(file_path)
    year = filename.replace('ProviderInfo_', '').replace('.csv', '')
    
    # Read the CSV file
    df = read_csv_with_encodings(file_path)
    
    if df is not None:
        # Generate year column
        df['year'] = year
        
        # Store the original DataFrame
        raw_provider_info[f'raw_pi_{year}'] = df

# Print out the keys of the dictionary to verify
print("Raw dataframes:", raw_provider_info.keys())

Successfully read the file with utf-8 encoding
Failed to read with utf-8 encoding
Successfully read the file with latin-1 encoding
Successfully read the file with utf-8 encoding
Failed to read with utf-8 encoding
Successfully read the file with latin-1 encoding
Failed to read with utf-8 encoding
Successfully read the file with latin-1 encoding
Successfully read the file with utf-8 encoding
Failed to read with utf-8 encoding
Successfully read the file with latin-1 encoding
Raw dataframes: dict_keys(['raw_pi_2016', 'raw_pi_2017', 'raw_pi_2015', 'raw_pi_2020', 'raw_pi_2021', 'raw_pi_2019', 'raw_pi_2018'])


In [3]:
# lowercase for all column names
for key in raw_provider_info:
    raw_provider_info[key].columns = raw_provider_info[key].columns.str.lower()

In [55]:
# Dictionary to store custom tables
clean_provider_info = {}

# Define the columns for each custom table
def create_provider_info_tables(raw_provider_info):
    tables = {}
    
    for key, df in raw_provider_info.items():
        year = key.split('_')[-1]
        
        # List the columns to keep
        # Adjust these column names to match actual data
        columns_to_keep = [
            'provnum', 'federal provider number',
            'provname', 'provider name',
            'address', 'provider address',
            'city', 'provider city',
            'state', 'provider state',
            'zip', 'provider zip code',
            'phone', 'provider phone number',
            'county_ssa', 'provider ssa county code',
            'county_name', 'provider county name',
            'ownership', 'ownership type',
            'bedcert', 'number of certified beds',
            'restot', 'average number of residents per day',
            'overall_rating', 'overall rating',
            'tot_penlty_cnt', 'total number of penalties',
            'rnhrd', 'reported rn staffing hours per resident per day',
            'totlichrd', 'reported licensed staffing hours per resident per day',
            'tothrd', 'reported total nurse staffing hours per resident per day',
            'pthrd', 'reported physical therapist staffing hours per resident per day',
            'year'
        ]
        
        # Only keep columns that exist in the dataframe
        valid_columns = [col for col in columns_to_keep if col in df.columns]
        
        # Create new table with only the columns you need
        if valid_columns:
            tables[f'provider_basic_{year}'] = df[valid_columns].copy()
    
    return tables

provider_info_tables = create_provider_info_tables(raw_provider_info)
clean_provider_info.update(provider_info_tables)

# Print out the keys of the dictionary to verify
print("Custom Tables:", clean_provider_info.keys())

Custom Tables: dict_keys(['provider_basic_2016', 'provider_basic_2017', 'provider_basic_2015', 'provider_basic_2020', 'provider_basic_2021', 'provider_basic_2019', 'provider_basic_2018'])


In [59]:
# Rename 2020 and 2021 file to standard name
for key, df in clean_provider_info.items():
    if key in ['provider_basic_2020', 'provider_basic_2021']:
        clean_provider_info[key] = df.rename(columns={
            'federal provider number': 'provnum',
            'provider name': 'provname',
            'provider address': 'address',
            'provider city': 'city',
            'provider state': 'state',
            'provider zip code': 'zip',
            'provider phone number': 'phone',
            'provider ssa county code': 'county_ssa',
            'provider county name': 'county_name',
            'ownership type': 'ownership',
            'number of certified beds': 'bedcert',
            'average number of residents per day': 'restot',
            'overall rating': 'overall_rating',
            'total number of penalties': 'tot_penlty_cnt',
            'reported rn staffing hours per resident per day': 'rnhrd',
            'reported licensed staffing hours per resident per day': 'totlichrd',
            'reported total nurse staffing hours per resident per day': 'tothrd',
            'reported physical therapist staffing hours per resident per day': 'pthrd'
        })

In [61]:
# Union all file
union_provider_info = pd.concat(clean_provider_info.values(), ignore_index=True)

union_provider_info.info()
union_provider_info.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108637 entries, 0 to 108636
Data columns (total 19 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   provnum         108637 non-null  object 
 1   provname        108637 non-null  object 
 2   address         108637 non-null  object 
 3   city            108637 non-null  object 
 4   state           108637 non-null  object 
 5   zip             108637 non-null  int64  
 6   phone           108637 non-null  int64  
 7   county_ssa      108637 non-null  int64  
 8   county_name     108637 non-null  object 
 9   ownership       108636 non-null  object 
 10  bedcert         108637 non-null  int64  
 11  restot          107914 non-null  float64
 12  overall_rating  106945 non-null  float64
 13  tot_penlty_cnt  108637 non-null  int64  
 14  rnhrd           104754 non-null  float64
 15  totlichrd       104754 non-null  float64
 16  tothrd          104754 non-null  float64
 17  pthrd     

Unnamed: 0,provnum,provname,address,city,state,zip,phone,county_ssa,county_name,ownership,bedcert,restot,overall_rating,tot_penlty_cnt,rnhrd,totlichrd,tothrd,pthrd,year
58880,425110,MOUNT PLEASANT MANOR,921 BOWMAN ROAD,MT PLEASANT,SC,29464,8438848903,90,Charleston,For profit - Corporation,132,123.0,3.0,2,0.50594,1.27878,3.07569,0.05622,2020
76074,535057,GOSHEN HEALTHCARE COMMUNITY,2009 LARAMIE STREET,TORRINGTON,WY,82240,3075324038,70,Goshen,For profit - Corporation,103,66.1,2.0,4,0.66508,1.27843,3.64442,0.00436,2021
28865,495218,BRIAN CENTER HEALTH AND REHABILITATION,188 OLD FINCASTLE ROAD,FINCASTLE,VA,24090,5404732288,110,Botetourt,Non profit - Corporation,60,45.0,2.0,0,1.11,5.78333,5.78333,0.15556,2017


In [63]:
# Change data type

# Columns to convert to float
cols_to_convert = ['provnum', 'year']

# Convert specified columns to float
union_provider_info[cols_to_convert] = union_provider_info[cols_to_convert].apply(pd.to_numeric, errors='coerce').astype(float)

union_provider_info.info()
union_provider_info.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108637 entries, 0 to 108636
Data columns (total 19 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   provnum         106923 non-null  float64
 1   provname        108637 non-null  object 
 2   address         108637 non-null  object 
 3   city            108637 non-null  object 
 4   state           108637 non-null  object 
 5   zip             108637 non-null  int64  
 6   phone           108637 non-null  int64  
 7   county_ssa      108637 non-null  int64  
 8   county_name     108637 non-null  object 
 9   ownership       108636 non-null  object 
 10  bedcert         108637 non-null  int64  
 11  restot          107914 non-null  float64
 12  overall_rating  106945 non-null  float64
 13  tot_penlty_cnt  108637 non-null  int64  
 14  rnhrd           104754 non-null  float64
 15  totlichrd       104754 non-null  float64
 16  tothrd          104754 non-null  float64
 17  pthrd     

Unnamed: 0,provnum,provname,address,city,state,zip,phone,county_ssa,county_name,ownership,bedcert,restot,overall_rating,tot_penlty_cnt,rnhrd,totlichrd,tothrd,pthrd,year
21566,215325.0,HERITAGE HARBOUR HEALTH AND REHABILITATION CENTER,2700 SOUTH HAVEN ROAD,ANNAPOLIS,MD,21401,4108971300,10,Anne Arundel,For profit - Corporation,154,139.0,3.0,0,0.87158,1.86331,3.2723,0.1259,2017.0
25360,345457.0,BELAIRE HEALTH CARE CENTER,2065 LYON STREET,GASTONIA,NC,28052,7048677300,350,Gaston,For profit - Corporation,80,75.0,2.0,0,0.66067,1.64867,3.542,0.11733,2017.0
24721,335482.0,HERITAGE COMMONS RESIDENTIAL HEALTH CARE,1019 WICKER STREET,TICONDEROGA,NY,12883,5185856771,260,Essex,Non profit - Corporation,84,78.0,3.0,0,1.01603,1.72885,4.10577,0.025,2017.0


In [65]:
# CLEAN COST REPORT

In [67]:
# Redefine the directory path
directory = '/Users/apple/Documents/APD/cost_report'

# Get all CSV files matching the pattern
files = glob.glob(os.path.join(directory, '*_CostReport.csv'))

# Dictionaries to store original DataFrames
raw_cost_report = {}

# Process each file
for file_path in files:
    # Extract the year from the filename
    filename = os.path.basename(file_path)
    year = filename.split('_')[0]
    
    # Read the CSV file
    df = read_csv_with_encodings(file_path)

    
    if df is not None:
        # Generate year column
        df['year'] = year
        
        # Store the original DataFrame
        raw_cost_report[f'raw_cost_{year}'] = df

# Print out the keys of the dictionary to verify
print("Raw dataframes:", raw_cost_report.keys())

Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Raw dataframes: dict_keys(['raw_cost_2021', 'raw_cost_2020', 'raw_cost_2017', 'raw_cost_2018', 'raw_cost_2019', 'raw_cost_2016', 'raw_cost_2015'])


In [68]:
# Lowercase for all column names
for key in raw_cost_report:
    raw_cost_report[key].columns = raw_cost_report[key].columns.str.lower()

In [71]:
# Dictionary to store your custom tables
clean_cost_report = {}

# Define the columns for each custom table
def create_cost_report_tables(raw_cost_report):
    tables = {}
    
    for key, df in raw_cost_report.items():
        year = key.split('_')[-1]
        
        # List the columns to keep
        # Adjust these column names to match actual data
        columns_to_keep = [
            'rpt_rec_num',
            'provider_ccn', 'provider ccn',
            'rural_versus_urban', 'rural versus urban',
            'gross_revenue', 'gross revenue',
            'inpatient_revenue', 'inpatient revenue',
            'net_income', 'net income',
            'net_patient_revenue', 'net patient revenue',
            'number_of_beds', 'number of beds',
            'total_costs', 'total costs',
            'total_income', 'total income',
            'total_salaries_adjusted', 'total salaries (adjusted)',
            'year'
        ]

        
        # Only keep columns that exist in the dataframe
        valid_columns = [col for col in columns_to_keep if col in df.columns]
        
        # Create new table
        if valid_columns:
            tables[f'cost_report_clean_{year}'] = df[valid_columns].copy()
    
    return tables

cost_report_tables = create_cost_report_tables(raw_cost_report)
clean_cost_report.update(cost_report_tables)

# Print out the keys of the dictionary to verify
print("Custom Tables:", clean_cost_report.keys())

Custom Tables: dict_keys(['cost_report_clean_2021', 'cost_report_clean_2020', 'cost_report_clean_2017', 'cost_report_clean_2018', 'cost_report_clean_2019', 'cost_report_clean_2016', 'cost_report_clean_2015'])


In [73]:
# Rename 2020 and 2021 file to standard name
for key, df in clean_cost_report.items():
    if key in ['cost_report_clean_2020', 'cost_report_clean_2021']:
        clean_cost_report[key] = df.rename(columns={
            'provider ccn': 'provider_ccn',
            'rural versus urban': 'rural_versus_urban',
            'gross revenue': 'gross_revenue',
            'inpatient revenue': 'inpatient_revenue',
            'net income': 'net_income',
            'net patient revenue': 'net_patient_revenue',
            'number of beds': 'number_of_beds',
            'total costs': 'total_costs',
            'total income': 'total_income',
            'total salaries (adjusted)': 'total_salaries_adjusted'
        })

In [75]:
# Union all file
union_cost_report = pd.concat(clean_cost_report.values(), ignore_index=True)

union_cost_report.info()
union_cost_report.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106269 entries, 0 to 106268
Data columns (total 12 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   rpt_rec_num              106269 non-null  int64  
 1   provider_ccn             106269 non-null  int64  
 2   rural_versus_urban       104082 non-null  object 
 3   gross_revenue            103988 non-null  float64
 4   inpatient_revenue        103985 non-null  float64
 5   net_income               89205 non-null   float64
 6   net_patient_revenue      103997 non-null  float64
 7   number_of_beds           103867 non-null  float64
 8   total_costs              104131 non-null  float64
 9   total_income             103981 non-null  float64
 10  total_salaries_adjusted  104196 non-null  float64
 11  year                     106269 non-null  object 
dtypes: float64(8), int64(2), object(2)
memory usage: 9.7+ MB


Unnamed: 0,rpt_rec_num,provider_ccn,rural_versus_urban,gross_revenue,inpatient_revenue,net_income,net_patient_revenue,number_of_beds,total_costs,total_income,total_salaries_adjusted,year
33105,1200836,235606,R,8155408.0,8155408.0,507578.0,6345623.0,53.0,1357850.0,574210.0,2505276.0,2017
46013,1221338,425387,U,12042788.0,12042788.0,-1857789.0,9363882.0,80.0,1800548.0,-1857789.0,4166183.0,2018
32206,1198901,165480,R,6521293.0,6521293.0,-623992.0,5392910.0,78.0,660727.0,-599422.0,3541531.0,2017


In [77]:
# Change data type

# Columns to convert to float
cols_to_convert = ['provider_ccn', 'year']

# Convert specified columns to float
union_cost_report[cols_to_convert] = union_cost_report[cols_to_convert].apply(pd.to_numeric, errors='coerce').astype(float)
union_cost_report.info()
union_cost_report.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106269 entries, 0 to 106268
Data columns (total 12 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   rpt_rec_num              106269 non-null  int64  
 1   provider_ccn             106269 non-null  float64
 2   rural_versus_urban       104082 non-null  object 
 3   gross_revenue            103988 non-null  float64
 4   inpatient_revenue        103985 non-null  float64
 5   net_income               89205 non-null   float64
 6   net_patient_revenue      103997 non-null  float64
 7   number_of_beds           103867 non-null  float64
 8   total_costs              104131 non-null  float64
 9   total_income             103981 non-null  float64
 10  total_salaries_adjusted  104196 non-null  float64
 11  year                     106269 non-null  float64
dtypes: float64(10), int64(1), object(1)
memory usage: 9.7+ MB


Unnamed: 0,rpt_rec_num,provider_ccn,rural_versus_urban,gross_revenue,inpatient_revenue,net_income,net_patient_revenue,number_of_beds,total_costs,total_income,total_salaries_adjusted,year
103016,1157776,555162.0,U,6139454.0,6139454.0,230407.0,6012909.0,59.0,1420219.0,230407.0,2778769.0,2015.0
25658,1324967,425287.0,U,9131463.0,11940265.0,,1326324.0,88.0,1270666.0,1745468.0,3417068.0,2020.0
61928,1255428,165326.0,R,5324831.0,5324831.0,434432.0,5324831.0,91.0,442005.0,434432.0,2568690.0,2019.0


In [79]:
# LOAD AND CLEAN HD

In [81]:
# Redefine the directory path
directory = '/Users/apple/Documents/APD/HD'

# Get all CSV files matching the pattern
files = glob.glob(os.path.join(directory, 'HD*.csv'))

# Dictionaries to store original DataFrames
raw_HD = {}

# Process each file
for file_path in files:
    # Extract the year from the filename
    filename = os.path.basename(file_path)
    year = filename[2:6]
    
    # Read the CSV file
    df = read_csv_with_encodings(file_path)

    
    if df is not None:
        # Generate year column
        df['year'] = year
        
        # Store the original DataFrame
        raw_HD[f'raw_HD{year}'] = df

# Print out the keys of the dictionary to verify
print("Raw dataframes:", raw_HD.keys())

Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Successfully read the file with utf-8 encoding
Raw dataframes: dict_keys(['raw_HD2019', 'raw_HD2018', 'raw_HD2020', 'raw_HD2021', 'raw_HD2016', 'raw_HD2017', 'raw_HD2015'])


In [83]:
# Standardize Column Name
for key in raw_HD:
    raw_HD[key].columns = raw_HD[key].columns.str.lower()

for key in raw_HD:
    raw_HD[key] = raw_HD[key].rename(columns = {
        'sum of hd code': 'sum_of_hd_code',
        'count of hd code': 'count_of_hd_code'
    })

In [85]:
# Union all file
union_HD = pd.concat(raw_HD.values(), ignore_index=True)

union_HD.info()
union_HD.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107516 entries, 0 to 107515
Data columns (total 4 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   provnum           107516 non-null  object
 1   sum_of_hd_code    107516 non-null  int64 
 2   count_of_hd_code  107516 non-null  int64 
 3   year              107516 non-null  object
dtypes: int64(2), object(2)
memory usage: 3.3+ MB


Unnamed: 0,provnum,sum_of_hd_code,count_of_hd_code,year
48220,106045,92,23,2021
19815,165562,53,11,2018
52351,245148,342,65,2021


In [87]:
# Change data type

# Columns to convert to float
cols_to_convert = ['provnum', 'year']

# Convert specified columns to float
union_HD[cols_to_convert] = union_HD[cols_to_convert].apply(pd.to_numeric, errors='coerce').astype(float)

union_HD.info()
union_HD.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107516 entries, 0 to 107515
Data columns (total 4 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   provnum           105836 non-null  float64
 1   sum_of_hd_code    107516 non-null  int64  
 2   count_of_hd_code  107516 non-null  int64  
 3   year              107516 non-null  float64
dtypes: float64(2), int64(2)
memory usage: 3.3 MB


Unnamed: 0,provnum,sum_of_hd_code,count_of_hd_code,year
48458,115483.0,44,9,2021.0
65977,185230.0,91,21,2016.0
33436,115688.0,26,6,2020.0


In [89]:
# MERGE ALL FILE 

In [91]:
nursing_merge = union_provider_info.merge(union_cost_report, 
                                          left_on=['provnum', 'year'], 
                                          right_on=['provider_ccn', 'year'], 
                                          how='inner') \
               .merge(union_HD, 
                      left_on=['provnum', 'year'], 
                      right_on=['provnum', 'year'], 
                      how='left')
nursing_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105832 entries, 0 to 105831
Data columns (total 32 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   provnum                  105832 non-null  float64
 1   provname                 105832 non-null  object 
 2   address                  105832 non-null  object 
 3   city                     105832 non-null  object 
 4   state                    105832 non-null  object 
 5   zip                      105832 non-null  int64  
 6   phone                    105832 non-null  int64  
 7   county_ssa               105832 non-null  int64  
 8   county_name              105832 non-null  object 
 9   ownership                105831 non-null  object 
 10  bedcert                  105832 non-null  int64  
 11  restot                   105260 non-null  float64
 12  overall_rating           104168 non-null  float64
 13  tot_penlty_cnt           105832 non-null  int64  
 14  rnhr

In [105]:
# Remove duplicate rows and columns
nursing_merge = nursing_merge.drop_duplicates()
nursing_merge.drop(['provider_ccn', 'rpt_rec_num'], axis=1, inplace=True)

nursing_merge.info()
pd.set_option('display.max_columns', None)
nursing_merge.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105832 entries, 0 to 105831
Data columns (total 30 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   provnum                  105832 non-null  int64  
 1   provname                 105832 non-null  object 
 2   address                  105832 non-null  object 
 3   city                     105832 non-null  object 
 4   state                    105832 non-null  object 
 5   zip                      105832 non-null  int64  
 6   phone                    105832 non-null  int64  
 7   county_ssa               105832 non-null  int64  
 8   county_name              105832 non-null  object 
 9   ownership                105831 non-null  object 
 10  bedcert                  105832 non-null  int64  
 11  restot                   105260 non-null  float64
 12  overall_rating           104168 non-null  float64
 13  tot_penlty_cnt           105832 non-null  int64  
 14  rnhr

Unnamed: 0,provnum,provname,address,city,state,zip,phone,county_ssa,county_name,ownership,bedcert,restot,overall_rating,tot_penlty_cnt,rnhrd,totlichrd,tothrd,pthrd,year,rural_versus_urban,gross_revenue,inpatient_revenue,net_income,net_patient_revenue,number_of_beds,total_costs,total_income,total_salaries_adjusted,sum_of_hd_code,count_of_hd_code
58833,525281,BAY AT WATERS EDGE HEALTH AND REHABILITATION (...,3415 N SHERIDAN RD,KENOSHA,WI,53140,2626576175,290,Kenosha,For profit - Corporation,128,68.9,1.0,0,0.5153,1.40834,3.35143,0.06562,2020,U,6372964.0,9613738.0,,-348876.0,128.0,1053630.0,943548.0,2908586.0,209.0,50.0
30858,45191,GOOD SAMARITAN SOCIETY - HOT SPRINGS VILLAGE,121 CORTEZ RD,HOT SPRINGS VILLAGE,AR,71909,5019222000,250,Garland,Non profit - Corporation,50,39.0,3.0,3,0.80385,1.76026,4.43718,0.28974,2015,U,9587571.0,8716194.0,216454.0,7283550.0,68.0,1379367.0,275378.0,2924642.0,99.0,16.0
56298,375427,"FAIRVIEW FELLOWSHIP HOME FOR SENIOR CITIZENS, INC",605 EAST STATE ROAD,FAIRVIEW,OK,73737,5802273783,460,Major,Non profit - Corporation,100,64.1,2.0,0,0.18035,1.30899,4.40002,0.02562,2020,R,,,,-3974785.0,126.0,207049.0,565625.0,1610497.0,146.0,30.0


In [109]:
# Clean data type in merging file

# Convert specified columns to string
cols_to_convert = ['provnum', 'year']
nursing_merge[cols_to_convert] = nursing_merge[cols_to_convert].apply(pd.to_numeric, errors='coerce').astype(int)

nursing_merge.info()
nursing_merge.sample(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105832 entries, 0 to 105831
Data columns (total 30 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   provnum                  105832 non-null  int64  
 1   provname                 105832 non-null  object 
 2   address                  105832 non-null  object 
 3   city                     105832 non-null  object 
 4   state                    105832 non-null  object 
 5   zip                      105832 non-null  int64  
 6   phone                    105832 non-null  int64  
 7   county_ssa               105832 non-null  int64  
 8   county_name              105832 non-null  object 
 9   ownership                105831 non-null  object 
 10  bedcert                  105832 non-null  int64  
 11  restot                   105260 non-null  float64
 12  overall_rating           104168 non-null  float64
 13  tot_penlty_cnt           105832 non-null  int64  
 14  rnhr

Unnamed: 0,provnum,provname,address,city,state,zip,phone,county_ssa,county_name,ownership,bedcert,restot,overall_rating,tot_penlty_cnt,rnhrd,totlichrd,tothrd,pthrd,year,rural_versus_urban,gross_revenue,inpatient_revenue,net_income,net_patient_revenue,number_of_beds,total_costs,total_income,total_salaries_adjusted,sum_of_hd_code,count_of_hd_code
32652,105754,CONWAY LAKES HEALTH & REHABILITATION CENTER,5201 CURRY FORD ROAD,ORLANDO,FL,32812,4073848838,470,Orange,For profit - Corporation,120,106.0,5.0,0,1.41651,1.96368,4.95142,0.27689,2015,U,17993178.0,17993178.0,1511988.0,15945024.0,120.0,4556599.0,1511988.0,6926944.0,56.0,14.0
4609,175205,"WILSON CARE & REHABILITATION CENTER, LLC","611 31ST STREET, PO BOX 160",WILSON,KS,67490,7856582505,260,Ellsworth,For profit - Corporation,46,39.0,1.0,2,0.90769,1.36795,2.8218,0.02051,2016,R,2676399.0,2676399.0,-606456.0,2063362.0,46.0,527801.0,-606456.0,901834.0,202.0,44.0
19328,165305,CHARITON SPECIALTY CARE,1214 NORTH SEVENTH STREET,CHARITON,IA,50049,6417745921,580,Lucas,Non profit - Corporation,119,57.0,3.0,0,0.27193,1.0614,2.67105,0.09825,2017,R,4037001.0,4037001.0,-247512.0,3470259.0,75.0,464860.0,-247512.0,1627796.0,38.0,9.0
