# Data Cleansing for Health Investment

## Set up data_exploration function 

In [82]:
import pandas as pd
import numpy as np

pd.set_option('display.max_column', None)
pd.set_option('display.float_format', '{:.0f}'.format)

# Data Exploration function
def data_explore(filepath):
    df=pd.read_csv(filepath,encoding='ISO-8859-1')

    print(f"\n\nData dimensions:")
    print(df.shape)

    print(f"\n\nNull values:")
    null_values = df.isnull().sum()
    for i in range(0, len(null_values), 10):  # Adjust 10 to the number of columns per chunk
        print(null_values[i:i+10])  # Display chunks

    print(f"\n\nDuplicated values:")
    print(df.duplicated().sum())

    print(f"\n\nHead:")
    print(df.head()) 

    print(f"\n\nSummary:")
    print(df.info()) 

    print(f"\n\n Descriptive stats:")
    print(df.describe()) 

    return df

## provider_info tables

In [83]:
provider_info_2015=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2015.csv")
provider_info_2016=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2016.csv")
provider_info_2017=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2017.csv")
provider_info_2018=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2018.csv")
provider_info_2019=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2019.csv")
provider_info_2020=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2020.csv")
provider_info_2021=data_explore("C:/Users/nhien/Downloads/HI/ProviderInfo_2021.csv")



Data dimensions:
(15661, 79)


Null values:
provnum        0
PROVNAME       0
ADDRESS        0
CITY           0
STATE          0
ZIP            0
PHONE          0
COUNTY_SSA     0
COUNTY_NAME    0
OWNERSHIP      0
dtype: int64
BEDCERT                 0
RESTOT                  0
CERTIFICATION           0
INHOSP                  0
LBN                     0
PARTICIPATION_DATE      0
CCRC_FACIL              0
SFF                     0
CHOW_LAST_12MOS         0
resfamcouncil         608
dtype: int64
sprinkler_status          0
overall_rating          135
overall_rating_fn     15526
survey_rating           135
survey_rating_fn      15526
quality_rating          174
quality_rating_fn     15487
staffing_rating         459
staffing_rating_fn    15202
RN_staffing_rating      459
dtype: int64
rn_staffing_rating_fn    15202
STAFFING_FLAG            15252
PT_STAFFING_FLAG         15248
AIDHRD                     409
VOCHRD                     409
RNHRD                      409
TOTLICHRD          

### transformation function

In [84]:
def process_csv_file(df):
    # Define the required columns
    required_cols = ["provnum", "provname", "state", "address", "city", "zip", 
                     "ownership", "restot", "overall_rating", 
                     "survey_rating", "quality_rating", "staffing_rating", "adj_total"]
    
    # Convert column names to lowercase
    df.columns = [col.lower() for col in df.columns]
    
    # Keep only the required columns
    df = df[required_cols]

    return df


cleaned_data_2015= process_csv_file(provider_info_2015)
cleaned_data_2016= process_csv_file(provider_info_2016)
cleaned_data_2017= process_csv_file(provider_info_2017)
cleaned_data_2018= process_csv_file(provider_info_2018)
cleaned_data_2019= process_csv_file(provider_info_2019)


In [85]:
def process_csv_file(filename):
    # Define the required columns
    required_cols = ["provnum", "provname", "state", "address", "city", "zip", 
                     "ownership", "restot", "overall_rating", 
                     "survey_rating", "quality_rating", "staffing_rating", "adj_total"]
    
    # Define column mapping
    column_mapping = {
        "Federal Provider Number": "provnum", 
        "Provider Name": "provname", 
        "Provider State": "state", 
        "Provider Address": "address", 
        "Provider City": "city", 
        "Provider Zip Code": "zip", 
        "Ownership Type": "ownership", 
        "Average Number of Residents per Day": "restot", 
        "Overall Rating": "overall_rating", 
        "Health Inspection Rating": "survey_rating", 
        "QM Rating": "quality_rating", 
        "Staffing Rating": "staffing_rating", 
        "Adjusted Total Nurse Staffing Hours per Resident per Day": "adj_total"
    }

    # Rename columns based on the mapping
    filename = filename.rename(columns=column_mapping)
    
    # Convert column names to lowercase
    filename.columns = [col.lower() for col in filename.columns]
    
    # Keep only the required columns
    filename = filename[required_cols]
    
    return filename

# Process the 2020 and 2021 files
cleaned_data_2020 = process_csv_file(provider_info_2020)
cleaned_data_2021 = process_csv_file(provider_info_2021)

# Add a year column to each dataset
cleaned_data_2015['year'] = 2015
cleaned_data_2016['year'] = 2016
cleaned_data_2017['year'] = 2017
cleaned_data_2018['year'] = 2018
cleaned_data_2019['year'] = 2019
cleaned_data_2020['year'] = 2020
cleaned_data_2021['year'] = 2021

# Concatenate all datasets into a single dataframe
provider_info = pd.concat([cleaned_data_2015, cleaned_data_2016, cleaned_data_2017, cleaned_data_2018,cleaned_data_2019, cleaned_data_2020, cleaned_data_2021], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_data_2015['year'] = 2015
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_data_2016['year'] = 2016
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_data_2017['year'] = 2017
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

In [147]:
# check 
print(provider_info.head())
print(provider_info.info())
print(provider_info.isnull().sum())

# print(provider_info.shape)
# print(provider_info['provnum'].unique())
# pd.set_option('display.max_rows', None)  # Show all rows
# print(provider_info['provnum'].value_counts())

  provnum                                     provname state  \
0  015009                     BURNS NURSING HOME, INC.    AL   
1  015010                COOSA VALLEY NURSING FACILITY    AL   
2  015012                   HIGHLANDS HEALTH AND REHAB    AL   
3  015014  EASTVIEW REHABILITATION & HEALTHCARE CENTER    AL   
4  015015                PLANTATION MANOR NURSING HOME    AL   

                                    address          city    zip  \
0                      701 MONROE STREET NW  RUSSELLVILLE  35653   
1                   315 WEST HICKORY STREET     SYLACAUGA  35150   
2                       380 WOODS COVE ROAD    SCOTTSBORO  35768   
3                  7755 FOURTH AVENUE SOUTH    BIRMINGHAM  35206   
4  6450 OLD TUSCALOOSA HIGHWAY   P O BOX 97      MC CALLA  35111   

                  ownership  restot  overall_rating  survey_rating  \
0  For profit - Corporation      52               4              4   
1        Non profit - Other      79               2              1

In [None]:
# save
#  provider_info.to_csv('provider_info.csv', index=False, encoding='utf-8')

## health_deficiencies table

In [87]:
# best to go 1 by 1 
health_deficiencies_2015=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2015.csv")
health_deficiencies_2016=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2016.csv")
health_deficiencies_2017=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2017.csv")
health_deficiencies_2018=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2018.csv")
health_deficiencies_2019=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2019.csv")
health_deficiencies_2020=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2020.csv")
health_deficiencies_2021=data_explore("C:/Users/nhien/Downloads/HI/HealthDeficiencies_2021.csv") #the scope is numeric... 


  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(479167, 18)


Null values:
provnum               0
Provname              0
address               0
city                  0
state                 0
zip                   0
survey_date_output    0
SurveyType            0
defpref               0
tag                   0
dtype: int64
tag_desc        0
scope           0
defstat         0
statdate     7620
cycle           0
standard        0
complaint       0
filedate        0
dtype: int64


Duplicated values:
0


Head:
  provnum                  Provname               address          city state  \
0  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
1  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
2  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
3  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
4  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   

     zip survey_date_outp

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(329324, 18)


Null values:
provnum               0
Provname              0
address               0
city                  0
state                 0
zip                   0
survey_date_output    0
SurveyType            0
defpref               0
tag                   0
dtype: int64
tag_desc        0
scope           0
defstat         0
statdate     3566
cycle           0
standard        0
complaint       0
filedate        0
dtype: int64


Duplicated values:
0


Head:
  provnum                  Provname               address          city state  \
0  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
1  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
2  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
3  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
4  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   

     zip survey_date_outp

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(338451, 18)


Null values:
provnum               0
Provname              0
address               0
city                  0
state                 0
zip                   0
survey_date_output    0
SurveyType            0
defpref               0
tag                   0
dtype: int64
tag_desc        0
scope           0
defstat         0
statdate     2116
cycle           0
standard        0
complaint       0
filedate        0
dtype: int64


Duplicated values:
0


Head:
  provnum                       Provname                  address  \
0  015009       BURNS NURSING HOME, INC.     701 MONROE STREET NW   
1  015009       BURNS NURSING HOME, INC.     701 MONROE STREET NW   
2  015009       BURNS NURSING HOME, INC.     701 MONROE STREET NW   
3  015010  COOSA VALLEY NURSING FACILITY  315 WEST HICKORY STREET   
4  015010  COOSA VALLEY NURSING FACILITY  315 WEST HICKORY STREET   

           city state    zip survey_date_output SurveyType defpref  tag  \
0  RUSSELLVILLE    AL 

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(350887, 19)


Null values:
PROVNUM                  0
PROVNAME                 0
ADDRESS                  0
CITY                     0
STATE                    0
ZIP                      0
SURVEY_DATE_OUTPUT       0
HLTHSRVY_POST20171128    0
SURVEYTYPE               0
DEFPREF                  0
dtype: int64
TAG             0
TAG_DESC        0
SCOPE           0
DEFSTAT         0
STATDATE     1524
cycle           0
Standard        0
Complaint       0
FILEDATE        0
dtype: int64


Duplicated values:
0


Head:
  PROVNUM                       PROVNAME                  ADDRESS  \
0  015009       BURNS NURSING HOME, INC.     701 MONROE STREET NW   
1  015009       BURNS NURSING HOME, INC.     701 MONROE STREET NW   
2  015009       BURNS NURSING HOME, INC.     701 MONROE STREET NW   
3  015010  COOSA VALLEY NURSING FACILITY  315 WEST HICKORY STREET   
4  015010  COOSA VALLEY NURSING FACILITY  315 WEST HICKORY STREET   

           CITY STATE    ZIP SURVEY_DATE_OUTPUT H

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(362931, 19)


Null values:
PROVNUM               0
PROVNAME              0
ADDRESS               0
CITY                  0
STATE                 0
ZIP                   0
SURVEY_DATE_OUTPUT    0
SURVEYTYPE            0
DEFPREF               0
CATEGORY              0
dtype: int64
TAG             0
TAG_DESC        0
SCOPE           0
DEFSTAT         0
STATDATE     1701
cycle           0
Standard        0
Complaint       0
FILEDATE        0
dtype: int64


Duplicated values:
0


Head:
  PROVNUM                        PROVNAME                 ADDRESS  \
0  015009        BURNS NURSING HOME, INC.    701 MONROE STREET NW   
1  015009        BURNS NURSING HOME, INC.    701 MONROE STREET NW   
2  015009        BURNS NURSING HOME, INC.    701 MONROE STREET NW   
3  015010  COOSA VALLEY HEALTHCARE CENTER  260 WEST WALNUT STREET   
4  015010  COOSA VALLEY HEALTHCARE CENTER  260 WEST WALNUT STREET   

           CITY STATE    ZIP SURVEY_DATE_OUTPUT SURVEYTYPE DEFPREF  \
0  RUSSEL

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(360443, 20)


Null values:
Federal Provider Number    0
Provider Name              0
Provider Address           0
Provider City              0
Provider State             0
Provider Zip Code          0
Survey Date                0
Survey Type                0
Deficiency Prefix          0
Deficiency Category        0
dtype: int64
Deficiency Tag Number       0
Deficiency Description      0
Scope Severity Code         0
Deficiency Corrected        0
Correction Date           804
Inspection Cycle            0
Standard Deficiency         0
Complaint Deficiency        0
Location                    0
Processing Date             0
dtype: int64


Duplicated values:
0


Head:
  Federal Provider Number                   Provider Name  \
0                  015009        BURNS NURSING HOME, INC.   
1                  015009        BURNS NURSING HOME, INC.   
2                  015009        BURNS NURSING HOME, INC.   
3                  015010  COOSA VALLEY HEALTHCARE CENTER   
4

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(370746, 21)


Null values:
Federal Provider Number    0
Provider Name              0
Provider Address           0
Provider City              0
Provider State             0
Provider Zip Code          0
Survey Date                0
Survey Type                0
Deficiency Prefix          0
Deficiency Category        0
dtype: int64
Deficiency Tag Number                         0
Deficiency Description                        0
Scope Severity Code                           0
Deficiency Corrected                          0
Correction Date                            2001
Inspection Cycle                              0
Standard Deficiency                           0
Complaint Deficiency                          0
Infection Control Inspection Deficiency       0
Location                                      0
dtype: int64
Processing Date    0
dtype: int64


Duplicated values:
0


Head:
  Federal Provider Number                   Provider Name  \
0                   15009      

In [88]:
def process_hd(df):
  
    required_cols = ["provnum", "scope"]
    
    # Convert column names to lowercase
    df.columns = [col.lower() for col in df.columns]
    
    # Keep only the required columns
    df = df[required_cols]
   
    return df


# Process the 2020 and 2021 files
hd_2015 = process_hd(health_deficiencies_2015)
hd_2016 = process_hd(health_deficiencies_2016)
hd_2017 = process_hd(health_deficiencies_2017)
hd_2018 = process_hd(health_deficiencies_2018)
hd_2019 = process_hd(health_deficiencies_2019)

def process_hd(df):    
    # Convert column names to lowercase and strip spaces
    df.columns = [col.lower().strip() for col in df.columns]

    # Define column mapping
    column_mapping = {
        "federal provider number": "provnum", 
        "scope severity code": "scope"
    }
    
    # Define the required columns
    required_cols = ["provnum", "scope"]
    
    # Define column mapping
    # column_mapping = {
    #     "Federal Provider Number": "provnum", 
    #     "Scope Severity Code": "scope"
    # }

   # Rename columns based on mapping
    df.rename(columns=column_mapping, inplace=True)
    
    
    # Keep only the required columns
    df = df[required_cols]
    
    return df

hd_2020 = process_hd(health_deficiencies_2020)
hd_2021 = process_hd(health_deficiencies_2021)

# Add a year column to each dataset
hd_2015['year'] = 2015
hd_2016['year'] = 2016
hd_2017['year'] = 2017
hd_2018['year'] = 2018
hd_2019['year'] = 2019
hd_2020['year'] = 2020
hd_2021['year'] = 2021

# Concatenate all datasets into a single dataframe
health_deficiency = pd.concat([
    hd_2015, 
    hd_2016, 
    hd_2017, 
    hd_2018,
    hd_2019, 
    hd_2020, 
    hd_2021], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hd_2015['year'] = 2015
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hd_2016['year'] = 2016
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hd_2017['year'] = 2017
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cave

In [None]:
# check 

health_deficiency['PK']=  range(1, len(health_deficiency) + 1)

# Mapping dictionary
scope_mapping = {chr(65 + i): i + 1 for i in range(12)}

# Apply mapping
health_deficiency['scope_num'] = health_deficiency['scope'].map(scope_mapping)


print(health_deficiency.shape)
print(health_deficiency.head())
print(health_deficiency.info())
print(health_deficiency.isnull().sum())

# pd.set_option('display.max_rows', None)  # Show all rows
# print(health_deficiency['provnum'].value_counts())

(2591949, 5)
  provnum scope  year  PK scope_num
0  015009     D  2015   1         4
1  015009     D  2015   2         4
2  015009     F  2015   3         6
3  015009     D  2015   4         4
4  015009     E  2015   5         5
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2591949 entries, 0 to 2591948
Data columns (total 5 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   provnum    object
 1   scope      object
 2   year       int64 
 3   PK         int64 
 4   scope_num  object
dtypes: int64(2), object(3)
memory usage: 98.9+ MB
None
provnum      0
scope        0
year         0
PK           0
scope_num    0
dtype: int64


In [None]:
# make sure the scope column is char(1) length - found out about this error when importing to SQL
health_deficiency['scope'] = health_deficiency['scope'].astype(str).str.strip().str[:1]

print(health_deficiency.shape)
print(health_deficiency.head())
print(health_deficiency.info())
print(health_deficiency.isnull().sum())

(2591949, 5)
  provnum scope  year  PK scope_num
0  015009     D  2015   1         4
1  015009     D  2015   2         4
2  015009     F  2015   3         6
3  015009     D  2015   4         4
4  015009     E  2015   5         5
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2591949 entries, 0 to 2591948
Data columns (total 5 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   provnum    object
 1   scope      object
 2   year       int64 
 3   PK         int64 
 4   scope_num  object
dtypes: int64(2), object(3)
memory usage: 98.9+ MB
None
provnum      0
scope        0
year         0
PK           0
scope_num    0
dtype: int64


In [104]:
health_deficiency.loc[health_deficiency['year'] == 2021, 'scope_num'] = health_deficiency['scope']
print(health_deficiency[health_deficiency['scope_num'].isna()])


Empty DataFrame
Columns: [provnum, scope, year, PK, scope_num]
Index: []


  health_deficiency.loc[health_deficiency['year'] == 2021, 'scope_num'] = health_deficiency['scope']


In [106]:
# save
health_deficiency.to_csv('health_deficiencies.csv', index=False, encoding='utf-8')

## quality table

In [92]:
 
quality_2015=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2015.csv")
quality_2016=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2016.csv")
quality_2017=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2017.csv")
quality_2018=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2018.csv")
quality_2019=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2019.csv")
quality_2020=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2020.csv")
quality_2021=data_explore("C:/Users/nhien/Downloads/HI/QualityMsrMDS_2021.csv")


  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(281898, 22)


Null values:
provnum                 0
provname                0
address                 0
city                    0
state                   0
zip                     0
msr_cd                  0
msr_descr               0
stay_type               0
q1_measure_score    56134
dtype: int64
q1_measure_fn             225764
q2_measure_score           54979
q2_measure_fn             226919
q3_measure_score           54526
q3_measure_fn             227372
measure_score_3qtr_avg     15637
score3qtr_fn              266261
five_star_msr                  0
q1_quarter                     0
q2_quarter                     0
dtype: int64
q3_quarter    0
filedate      0
dtype: int64


Duplicated values:
0


Head:
  provnum                  provname               address          city state  \
0   15009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
1   15009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
2   15009  BURNS N

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(328671, 25)


Null values:
PROVNUM                 0
PROVNAME                0
ADDRESS                 0
CITY                    0
STATE                   0
ZIP                     0
MSR_CD                  0
MSR_DESCR               0
STAY_TYPE               0
Q1_MEASURE_SCORE    48371
dtype: int64
Q1_MEASURE_FN             280300
Q2_MEASURE_SCORE           48069
Q2_MEASURE_FN             280602
Q3_MEASURE_SCORE           47283
Q3_MEASURE_FN             281388
Q4_MEASURE_SCORE           47021
Q4_MEASURE_FN             281650
MEASURE_SCORE_4QTR_AVG     15471
SCORE4QTR_FN              313200
FIVE_STAR_MSR                  0
dtype: int64
Q1_QUARTER    0
Q2_QUARTER    0
Q3_QUARTER    0
Q4_QUARTER    0
FILEDATE      0
dtype: int64


Duplicated values:
0


Head:
  PROVNUM                  PROVNAME               ADDRESS          CITY STATE  \
0  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
1  015009  BURNS NURSING HOME, INC.  701 MONROE STR

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(328566, 25)


Null values:
PROVNUM                 0
PROVNAME                0
ADDRESS                 0
CITY                    0
STATE                   0
ZIP                     0
MSR_CD                  0
MSR_DESCR               0
STAY_TYPE               0
Q1_MEASURE_SCORE    47725
dtype: int64
Q1_MEASURE_FN             280841
Q2_MEASURE_SCORE           47773
Q2_MEASURE_FN             280793
Q3_MEASURE_SCORE           46730
Q3_MEASURE_FN             281836
Q4_MEASURE_SCORE           46478
Q4_MEASURE_FN             282088
MEASURE_SCORE_4QTR_AVG     15090
SCORE4QTR_FN              313476
FIVE_STAR_MSR                  0
dtype: int64
Q1_QUARTER    0
Q2_QUARTER    0
Q3_QUARTER    0
Q4_QUARTER    0
FILEDATE      0
dtype: int64


Duplicated values:
0


Head:
  PROVNUM                  PROVNAME               ADDRESS          CITY STATE  \
0  015009  BURNS NURSING HOME, INC.  701 MONROE STREET NW  RUSSELLVILLE    AL   
1  015009  BURNS NURSING HOME, INC.  701 MONROE STR

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(276264, 23)


Null values:
Federal Provider Number        0
Provider Name                  0
Provider Address               0
Provider City                  0
Provider State                 0
Provider Zip Code              0
Measure Code                   0
Measure Description            0
Resident type                  0
Q1 Measure Score           37468
dtype: int64
Footnote for Q1 Measure Score               238796
Q2 Measure Score                             37208
Footnote for Q2 Measure Score               239056
Q3 Measure Score                             37112
Footnote for Q3 Measure Score               239152
Q4 Measure Score                             37501
Footnote for Q4 Measure Score               238763
Four Quarter Average Score                   12131
Footnote for Four Quarter Average Score     264133
Used in Quality Measure Five Star Rating         0
dtype: int64
Measure Period     0
Location           0
Processing Date    0
dtype: int64


Duplicate

  df=pd.read_csv(filepath,encoding='ISO-8859-1')




Data dimensions:
(274752, 23)


Null values:
Federal Provider Number        0
Provider Name                  0
Provider Address               0
Provider City                  0
Provider State                 0
Provider Zip Code              0
Measure Code                   0
Measure Description            0
Resident type                  0
Q1 Measure Score           45997
dtype: int64
Footnote for Q1 Measure Score               228755
Q2 Measure Score                             48872
Footnote for Q2 Measure Score               225880
Q3 Measure Score                             48652
Footnote for Q3 Measure Score               226100
Q4 Measure Score                             46448
Footnote for Q4 Measure Score               228304
Four Quarter Average Score                   13084
Footnote for Four Quarter Average Score     261668
Used in Quality Measure Five Star Rating         0
dtype: int64
Measure Period     0
Location           0
Processing Date    0
dtype: int64


Duplicate

In [93]:
def process_qa(df):
  
    required_cols = [ "provnum", "msr_cd", "msr_descr", "avg_score"]
    
    # Convert column names to lowercase
    df.columns = [col.lower() for col in df.columns]
    df.rename(columns={"measure_score_3qtr_avg":"avg_score"},inplace=True)
    # Keep only the required columns
    df = df[required_cols]
   
    return df


# Process the 2020 and 2021 files
qa_2015 = process_qa(quality_2015)

def process_qa(df):
  
    required_cols = [ "provnum", "msr_cd", "msr_descr", "avg_score"]
    
    # Convert column names to lowercase
    df.columns = [col.lower() for col in df.columns]
    df.rename(columns={"measure_score_4qtr_avg":"avg_score"},inplace=True)
    # Keep only the required columns
    df = df[required_cols]
   
    return df


# Process the 2020 and 2021 files
qa_2016 = process_qa(quality_2016)
qa_2017 = process_qa(quality_2017)
qa_2018 = process_qa(quality_2018)
qa_2019 = process_qa(quality_2019)

def process_qa(df):    
    # Convert column names to lowercase and strip spaces
    df.columns = [col.lower().strip() for col in df.columns]

    # Define column mapping
    column_mapping = {
        "federal provider number": "provnum", 
        "measure code": "msr_cd",
        "measure description": "msr_descr",
        "four quarter average score": "avg_score"
    }
    
    # Define the required columns
    required_cols = [ "provnum", "msr_cd", "msr_descr", "avg_score"]
    
   # Rename columns based on mapping
    df.rename(columns=column_mapping, inplace=True)
    
    # Keep only the required columns
    df = df[required_cols]
    
    return df

qa_2020 = process_qa(quality_2020)
qa_2021 = process_qa(quality_2021)

# Add a year column to each dataset
qa_2015['year'] = 2015
qa_2016['year'] = 2016
qa_2017['year'] = 2017
qa_2018['year'] = 2018
qa_2019['year'] = 2019
qa_2020['year'] = 2020
qa_2021['year'] = 2021

# Concatenate all datasets into a single dataframe
quality = pd.concat([
    qa_2015, 
    qa_2016, 
    qa_2017, 
    qa_2018,
    qa_2019, 
    qa_2020, 
    qa_2021], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qa_2015['year'] = 2015
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qa_2016['year'] = 2016
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qa_2017['year'] = 2017
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cave

In [95]:
# check 
quality['PK']=  range(1, len(quality) + 1)
print((quality).shape)
print((quality).head())
print((quality).info())
print(quality.isnull().sum())

# pd.set_option('display.max_rows', None)  # Show all rows
# print(quality['provnum'].value_counts())

(2111616, 6)
  provnum  msr_cd                                          msr_descr  \
0   15009     401  Percent of Long Stay Residents Whose Need for ...   
1   15009     402  Percent of Long Stay Residents Who Self Report...   
2   15009     403  Percent of High Risk Long Stay Residents With ...   
3   15009     404  Percent of Long Stay Residents Who Lose Too Mu...   
4   15009     405  Percent of Low Risk Long Stay Residents Who Lo...   

   avg_score  year  PK  
0         16  2015   1  
1         12  2015   2  
2          3  2015   3  
3         14  2015   4  
4         30  2015   5  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2111616 entries, 0 to 2111615
Data columns (total 6 columns):
 #   Column     Dtype  
---  ------     -----  
 0   provnum    object 
 1   msr_cd     int64  
 2   msr_descr  object 
 3   avg_score  float64
 4   year       int64  
 5   PK         int64  
dtypes: float64(1), int64(3), object(2)
memory usage: 96.7+ MB
None
provnum          0
msr_cd       

In [144]:
# save
quality.to_csv('quality.csv', index=False, encoding='utf-8')

## penalties table

In [62]:
# best to go 1 by 1 
penalties_2015=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2015.csv")
penalties_2016=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2016.csv")
penalties_2017=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2017.csv")
penalties_2018=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2018.csv")
penalties_2019=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2019.csv")
penalties_2020=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2020.csv")
penalties_2021=data_explore("C:/Users/nhien/Downloads/HI/Penalties_2021.csv")




Data dimensions:
(6478, 12)


Null values:
provnum              0
provname             0
address              0
city                 0
state                0
zip                  0
pnlty_date           0
pnlty_type           0
fine_amt          1398
payden_strt_dt    5080
dtype: int64
payden_days    5080
filedate          0
dtype: int64


Duplicated values:
266


Head:
   provnum                                         provname  \
0    15019  MERRY WOOD LODGE CARE AND REHABILITATION CENTER   
1    15037            WOODLEY MANOR HEALTH & REHABILITATION   
2    15053                     CLEBURNE COUNTY NURSING HOME   
3    15053                     CLEBURNE COUNTY NURSING HOME   
4    15060        TERRACE OAKS CARE & REHABILITATION CENTER   

                       address        city state    zip  pnlty_date  \
0                  P O BOX 130      ELMORE    AL  36025  2014-10-02   
1            3312 WOODLEY ROAD  MONTGOMERY    AL  36116  2015-05-21   
2           122 BROCKFORD ROAD    

In [97]:
def process_pe(df):
  
    required_cols = [ "provnum", "pnlty_type", "fine_amt", "payden_days"]
    
    # Convert column names to lowercase
    df.columns = [col.lower() for col in df.columns]
    
    # Keep only the required columns
    df = df[required_cols]
   
    return df


# Process the 2020 and 2021 files
pe_2015 = process_pe(penalties_2015)
pe_2016 = process_pe(penalties_2016)
pe_2017 = process_pe(penalties_2017)
pe_2018 = process_pe(penalties_2018)
pe_2019 = process_pe(penalties_2019)

def process_pe(df):    
    # Convert column names to lowercase and strip spaces
    df.columns = [col.lower().strip() for col in df.columns]

    # Define column mapping
    column_mapping = {
        "federal provider number": "provnum", 
        "penalty type": "pnlty_type",
        "fine amount": "fine_amt",
        "payment denial length in days": "payden_days"
    }
    
    # Define the required columns
    required_cols = [ "provnum", "pnlty_type", "fine_amt", "payden_days"]
    
   # Rename columns based on mapping
    df.rename(columns=column_mapping, inplace=True)
    
    # Keep only the required columns
    df = df[required_cols]
    
    return df

pe_2020 = process_pe(penalties_2020)
pe_2021 = process_pe(penalties_2021)

# Add a year column to each dataset
pe_2015['year'] = 2015
pe_2016['year'] = 2016
pe_2017['year'] = 2017
pe_2018['year'] = 2018
pe_2019['year'] = 2019
pe_2020['year'] = 2020
pe_2021['year'] = 2021

# Concatenate all datasets into a single dataframe
penalties = pd.concat([
    pe_2015, 
    pe_2016, 
    pe_2017, 
    pe_2018,
    pe_2019, 
    pe_2020, 
    pe_2021], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pe_2015['year'] = 2015
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pe_2016['year'] = 2016
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pe_2017['year'] = 2017
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cave

In [99]:
# check 
penalties['PK']=  range(1, len(penalties) + 1)
print((penalties).shape)
print((penalties).tail())
print((penalties).info())
print(penalties.isnull().sum())

# pd.set_option('display.max_rows', None)  # Show all rows
# print(penalties['provnum'].value_counts())

(71488, 6)
      provnum pnlty_type  fine_amt  payden_days  year     PK
71483  676499       Fine      1300          NaN  2021  71484
71484  676499       Fine      1625          NaN  2021  71485
71485  676499       Fine      1950          NaN  2021  71486
71486  676499       Fine      2275          NaN  2021  71487
71487  686124       Fine     13065          NaN  2021  71488
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71488 entries, 0 to 71487
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   provnum      71488 non-null  object 
 1   pnlty_type   71488 non-null  object 
 2   fine_amt     60632 non-null  float64
 3   payden_days  10856 non-null  float64
 4   year         71488 non-null  int64  
 5   PK           71488 non-null  int64  
dtypes: float64(2), int64(2), object(2)
memory usage: 3.3+ MB
None
provnum            0
pnlty_type         0
fine_amt       10856
payden_days    60632
year               0
PK     

In [145]:
# save
penalties.to_csv('penalties.csv', index=False, encoding='utf-8')

## cost_report table

In [132]:
# CostReport_2015= data_explore("C:/Users/nhien/Downloads/HI/2015_CostReport.csv")
# CostReport_2016= data_explore("C:/Users/nhien/Downloads/HI/2016_CostReport.csv")
# CostReport_2017= data_explore("C:/Users/nhien/Downloads/HI/2017_CostReport.csv")
# CostReport_2018= data_explore("C:/Users/nhien/Downloads/HI/2018_CostReport.csv")
# CostReport_2019= data_explore("C:/Users/nhien/Downloads/HI/2019_CostReport.csv")
CostReport_2020= data_explore("C:/Users/nhien/Downloads/HI/2020_CostReport.csv")
CostReport_2021= data_explore("C:/Users/nhien/Downloads/HI/2021_CostReport.csv")



Data dimensions:
(14949, 122)


Null values:
rpt_rec_num               0
Provider CCN              0
Facility Name             0
Street Address            1
City                      0
State Code                0
Zip Code                  0
County                    1
Medicare CBSA Number    201
Rural versus Urban      212
dtype: int64
Fiscal Year Begin Date        200
Fiscal Year End Date          200
Type of Control                35
Total Days Title V          14928
Total Days Title XVIII        222
Total Days Title XIX         1432
Total Days Other              242
Total Days Total              204
Number of Beds                237
Total Bed Days Available      233
dtype: int64
Total Discharges Title V                  14924
Total Discharges Title XVIII                293
Total Discharges Title XIX                 1672
Total Discharges Title Other                458
Total Discharges Total                      229
SNF Average Length of Stay Title V        14930
SNF Average Length 

In [None]:
# # Many companies have multiple values reporterd per year 
# # pd.set_option('display.max_rows', None)  # Show all rows
print(CostReport_2021['provnum'].value_counts())


provnum
115360    3
115272    3
375551    3
375540    3
195543    3
375389    3
676441    3
315370    2
555025    2
115714    2
265803    2
235132    2
425138    2
315349    2
265846    2
465150    2
375331    2
365730    2
425298    2
425314    2
375229    2
675326    2
465172    2
155505    2
165557    2
676244    2
315228    2
105706    2
105702    2
675254    2
495213    2
325132    2
675017    2
315362    2
335560    2
395041    2
265536    2
365424    2
675977    2
165260    2
345460    2
055968    2
375404    2
365562    2
305005    2
366389    2
115624    2
155247    2
055799    2
165453    2
495277    2
265460    2
235021    2
425132    2
365713    2
555745    2
676235    2
045259    2
495107    2
495216    2
265516    2
365196    2
365773    2
145881    2
145004    2
365178    2
215258    2
676367    2
056304    2
495193    2
495202    2
365329    2
365425    2
505262    2
315454    2
045431    2
445530    2
056435    2
555118    2
265349    2
105428    2
676094    2
395471  

In [None]:
def process_cr(df):
  
    # Ensure provnum is char(6)
    df['Provider_CCN']=df['Provider_CCN'].astype(str).str.zfill(6) 

    # Fiscal year is char so adding a"Date_Difference" columnn to account for later
    df['Fiscal_Year_Begin_Date'] = pd.to_datetime(df['Fiscal_Year_Begin_Date'])
    df['Fiscal_Year_End_Date'] = pd.to_datetime(df['Fiscal_Year_End_Date'])
    # Calculate date difference for each row
    df['fiscal_period'] = (df['Fiscal_Year_End_Date'] - df['Fiscal_Year_Begin_Date']).dt.days

    required_cols = ['provnum',
                     'rural_versus_urban',
                     'accounts_receivable',
                     'cash_on_hand_and_in_banks',
                     'gross_revenue',
                     'inpatient_revenue',
                     'less_total_operating_expense',
                     'net_income',
                     'net_income_from_patients',
                     'net_patient_revenue',
                     'overhead_non_salary_costs',
                     'snf_days_total',
                     'salaries_wages_and_fees_payable',
                     'total_liab_and_fund_balances',
                     'total_salaries_adjusted',
                     'total_fund_balances',
                     'total_liabilities',
                     'fiscal_period']
    
    # Convert column names to lowercase
    df.columns = [col.lower() for col in df.columns]
    
        # Define column mapping
    column_mapping = {
        "provider_ccn": "provnum" 
    }
    
   # Rename columns based on mapping
    df.rename(columns=column_mapping, inplace=True)

    # Keep only the required columns
    df = df[required_cols]
   
    return df


# Process the 2020 and 2021 files
cr_2015 = process_cr(CostReport_2015)
cr_2016 = process_cr(CostReport_2016)
cr_2017 = process_cr(CostReport_2017)
cr_2018 = process_cr(CostReport_2018)
cr_2019 = process_cr(CostReport_2019)

def process_cr(df):
  


    # Fiscal year is char so adding a"Date_Difference" columnn to account for later
    df['Fiscal Year Begin Date'] = pd.to_datetime(df['Fiscal Year Begin Date'])
    df['Fiscal Year End Date'] = pd.to_datetime(df['Fiscal Year End Date'])
    # Calculate date difference for each row
    df['fiscal_period'] = (df['Fiscal Year End Date'] - df['Fiscal Year Begin Date']).dt.days

    required_cols = ['provnum',
                     'rural_versus_urban',
                     'accounts_receivable',
                     'cash_on_hand_and_in_banks',
                     'gross_revenue',
                     'inpatient_revenue',
                     'less_total_operating_expense',
                     'net_income',
                     'net_income_from_patients',
                     'net_patient_revenue',
                     'overhead_non_salary_costs',
                     'snf_days_total',
                     'salaries_wages_and_fees_payable',
                     'total_liab_and_fund_balances',
                     'total_salaries_adjusted',
                     'total_fund_balances',
                     'total_liabilities',
                     'fiscal_period']
    
    # Define column mapping
    column_mapping = {'Provider CCN':'provnum','Rural versus Urban':'rural_versus_urban','SNF Days Total':'accounts_receivable','Overhead Non-Salary Costs':'cash_on_hand_and_in_banks','Total Salaries (adjusted)':'gross_revenue','Cash on hand and in banks':'inpatient_revenue','Accounts Receivable':'less_total_operating_expense','Salaries, wages, and fees payable':'net_income','Total liabilities':'net_income_from_patients','Total fund balances':'net_patient_revenue','Total Liabilities and fund balances':'overhead_non_salary_costs','Inpatient Revenue':'snf_days_total','Gross Revenue':'salaries_wages_and_fees_payable','Net Patient Revenue':'total_liab_and_fund_balances','Less Total Operating Expense':'total_salaries_adjusted','Net Income from service to patients':'total_fund_balances','Net Income':'total_liabilities'
  
    }

   # Rename columns based on mapping
    df.rename(columns=column_mapping, inplace=True)
    
    # Ensure provnum is char(6)
    df['provnum']=df['provnum'].astype(str).str.zfill(6) 

    # Keep only the required columns
    df = df[required_cols]
   
    return df

cr_2020 = process_cr(CostReport_2020)
cr_2021 = process_cr(CostReport_2021)

# Add a year column to each dataset
cr_2015['year'] = 2015
cr_2016['year'] = 2016
cr_2017['year'] = 2017
cr_2018['year'] = 2018
cr_2019['year'] = 2019
cr_2020['year'] = 2020
cr_2021['year'] = 2021

# Concatenate all datasets into a single dataframe
cost_report = pd.concat([
    cr_2015, 
    cr_2016, 
    cr_2017, 
    cr_2018,
    cr_2019, 
    cr_2020, 
    cr_2021], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cr_2015['year'] = 2015
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cr_2016['year'] = 2016
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cr_2017['year'] = 2017
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cave

In [142]:
# # check 
cost_report['PK']=  range(1, len(cost_report) + 1)
print((cost_report).shape)
print((cost_report).tail())
print((cost_report).info())
print(cost_report.isnull().sum())

# pd.set_option('display.max_rows', None)  # Show all rows
# print(cost_report['provnum'].value_counts())

#here it can be seen that some companies have data shown more than once each year :) 

(106269, 20)
       provnum rural_versus_urban  accounts_receivable  \
106264  505453                  U                26216   
106265  675765                  U                37312   
106266  675932                  U                33049   
106267  675956                  U                65634   
106268  676481                  R                40669   

        cash_on_hand_and_in_banks  gross_revenue  inpatient_revenue  \
106264                    4088554        5450566            1700677   
106265                    8387886        6524045             468688   
106266                    2799973        3286574             206613   
106267                    9249788        6038339              25794   
106268                    4401409        5135327             951341   

        less_total_operating_expense  net_income  net_income_from_patients  \
106264                        943077      669994                   2574757   
106265                       2122183      384826       

In [146]:
# save
cost_report.to_csv('cost_report.csv', index=False, encoding='utf-8')

In [None]:
## Export some data to understand the financial statement
# # understand = pd.DataFrame(CostReport_2021.head(2))
# understand


Unnamed: 0,rpt_rec_num,Provider CCN,Facility Name,Street Address,City,State Code,Zip Code,County,Medicare CBSA Number,Rural versus Urban,Fiscal Year Begin Date,Fiscal Year End Date,Type of Control,Total Days Title V,Total Days Title XVIII,Total Days Title XIX,Total Days Other,Total Days Total,Number of Beds,Total Bed Days Available,Total Discharges Title V,Total Discharges Title XVIII,Total Discharges Title XIX,Total Discharges Title Other,Total Discharges Total,SNF Average Length of Stay Title V,SNF Average Length of Stay Title XVIII,SNF Average Length of Stay Title XIX,SNF Average Length of Stay Total,SNF Admissions Title V,SNF Admissions Title XVIII,SNF Admissions Title XIX,SNF Admissions Other,SNF Admissions Total,SNF Days Title V,SNF Days Title XVIII,SNF Days Title XIX,SNF Days Other,SNF Days Total,SNF Number of Beds,SNF Bed Days Available,SNF Discharges Title V,SNF Discharges Title XVIII,SNF Discharges Title XIX,SNF Discharges Title Other,SNF Discharges Total,NF Number of Beds,NF Bed Days Available,NF Days Title V,NF Days Title XIX,NF Days Other,NF Days Total,NF Discharges Title V,NF Discharges Title XIX,NF Discharges Title Other,NF Discharges Total,NF Average Length of Stay Title V,NF Average Length of Stay Title XIX,NF Average Length of Stay Total,NF Admissions Title V,NF Admissions Title XIX,NF Admissions Other,NF Admissions Total,Total RUG Days,Total Salaries From Worksheet A,Overhead Non-Salary Costs,Total Charges,Total Costs,Wage-related Costs (core),Total Salaries (adjusted),Contract Labor,Cash on hand and in banks,Temporary Investments,Notes Receivable,Accounts Receivable,Less: Allowances for uncollectible notes and accounts receivable,Inventory,Prepaid expenses,Other current assets,Total Current Assets,Land,Land improvements,Buildings,Leasehold improvements,Fixed equipment,Major movable equipment,Minor equipment depreciable,Total fixed Assets,Investments,Other Assets,Total other Assets,Total Assets,Accounts payable,"Salaries, wages, and fees payable",Payroll taxes payable,Notes and Loans Payable (short term),Deferred income,Other current liabilities,Total current liabilities,Mortgage payable,Notes Payable,Unsecured Loans,Other long term liabilities,Total long term liabilities,Total liabilities,General fund balance,Total fund balances,Total Liabilities and fund balances,Total General Inpatient Care Services Revenue,Inpatient Revenue,Outpatient Revenue,Gross Revenue,Less Contractual Allowance and discounts on patients' accounts,Net Patient Revenue,Less Total Operating Expense,Net Income from service to patients,Total Other Income,Total Income,Net Income,Inpatient PPS Amount,Nursing and Allied Health Education Activities,Allowable Bad Debts
0,1289595,305005,GREENBRIAR HEALTHCARE,55 HARRIS ROAD,NASHUA,NH,3062,HILLSBOROUGH,31700,U,11/01/2020,12/31/2020,4,,1050,8023.0,831,9904,290,17690,,12,27.0,14,53,,88,297.0,187,,43,6.0,19,68,,1050,8023.0,831,9904,290,17690,,12,27.0,14,53,,,,,,,,,,,,,,,,,,,1061192,1376924,353058,359371,228470,1061192,261178.0,428009,,,2541125,,,477910,,3447044,,,,105561,,415025,,453506,,196059.0,196059.0,4096609,1255196,122150,47651,2012871.0,,,3437868,,,,115177.0,115177.0,3553045,543564,543564,4096609,2278000,2631058,,2631058,-105598,2736656,2438116,298540,,298540,298540,583183,,
1,1290123,396142,PARAMOUNT NURSING & REHAB SOUTH HILL,100 KNOEDLER RD,PITTSBURGH,PA,15236,ALLEGHENY,38300,U,01/01/2021,03/01/2021,4,,244,,3,247,17,1530,,13,,1,14,,19,,18,,13,,1,14,,244,,3,247,17,1530,,13,,1,14,,,,,,,,,,,,,,,,,,,274267,205747,54506,99841,38855,274267,,78344,,,152240,,4924.0,417,,235925,,,,82714,,9926,,62549,,,,298474,1515,51556,1775,,170160.0,1627961.0,1852967,,,,,,1852967,-1554493,-1554493,298474,317627,385115,,385115,120004,265111,473381,-208270,,-208270,-208270,117121,,
