# Data Collection

In [1]:
import pandas as pd
import numpy as np
from datetime import date

In [2]:
df = pd.read_csv('Dialysis_Facility_Compare_-_Listing_by_Facility.csv')

# Rename columns

In [3]:
df.columns = [col.title() for col in df.columns]

In [4]:
df = df.rename(columns= {'# Of Dialysis Stations' : 'Number Of Dialysis Stations', 
                        'Crownweb Date' : 'CROWNWeb Date',
                        'Strr Date' : 'STrR Date', 
                        'Percentage Of Medicare Patients With Hgb<10 G/Dl' : 'Percentage Of Medicare Patients With Hgb < 10 g/dL', 
                        'Hgb<10 Data Availability Code' : 'Hgb < 10 Data Availability Code', 
                        'Percentage Of Medicare Patients With Hgb>12 G/Dl' : 'Percentage Of Medicare Patients With Hgb > 12 g/dL', 
                        'Percent Of Adult Hd Patients With Kt/V >= 1.2' : 'Percent Of Adult HD Patients With Kt/V >= 1.2', 
                        'Adult Hd Kt/V Data Availability Code' : 'Adult HD Kt/V Data Availability Code', 
                        'Percentage Of Adult Pd Pts With Kt/V >= 1.7' : 'Percentage Of Adult PD PTS With Kt/V >= 1.7',
                        'Adult Pd Kt/V Data Availability Code' : 'Adult PD Kt/V Data Availability Code', 
                        'Percentage Of Pediatric Hd Patients With Kt/V >= 1.2' : 'Percentage Of Pediatric HD Patients With Kt/V >= 1.2', 
                        'Pediatric Hd Kt/V Data Availability Code' : 'Pediatric HD Kt/V Data Availability Code', 
                        'Number Of Adult Hd Patients With Kt/V Data' : 'Number Of Adult HD Patients With Kt/V Data', 
                        'Number Of Adult Hd Patient-Months With Kt/V Data' : 'Number Of Adult HD Patient-Months With Kt/V Data', 
                        'Number Of Adult Pd Patients With Kt/V Data' : 'Number Of Adult PD Patients With Kt/V Data', 
                        'Number Of Adult Pd Patient-Months With Kt/V Data' : 'Number Of Adult PD Patient-Months With Kt/V Data', 
                        'Number Of Pediatric Hd Patients With Kt/V Data' : 'Number Of Pediatric HD Patients With Kt/V Data', 
                        'Number Of Pediatric Hd Patient-Months With Kt/V Data' : 'Number Of Pediatric HD Patient-Months With Kt/V Data', 
                        'Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 Mg/Dl)' : 'Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 mg/dL)', 
                        'Number Of Patient-Months In Serum Phosphorus Summary ' : 'Number Of Patient-Months In Serum Phosphorus Summary', 
                        'Serum Phosphorus Data Availability Code ' : 'Serum Phosphorus Data Availability Code',
                        'Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 Mg/Dl' : 'Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 mg/dL',
                        'Percentage Of Adult Patients With Serum Phosphorus Between 3.5-4.5 Mg/Dl' : 'Percentage Of Adult Patients With Serum Phosphorus Between 3.5 - 4.5 mg/dL', 
                        'Percentage Of Adult Patients With Serum Phosphorus Between 4.6-5.5 Mg/Dl' : 'Percentage Of Adult Patients With Serum Phosphorus Between 4.6 - 5.5 mg/dL', 
                        'Percentage Of Adult Patients With Serum Phosphorus Between 5.6-7.0 Mg/Dl' : 'Percentage Of Adult Patients With Serum Phosphorus Between 5.6 - 7.0 mg/dL', 
                        'Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 Mg/Dl' : 'Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 mg/dL',                         
                        'Shr Date' : 'SHR Date', 
                        'Srr Date' : 'SRR Date', 
                        'Smr Date' : 'SMR Date', 
                        'Patient Hospital Readmission Category' : 'Patient Hospital Readmission Category Text', 
                        'Number Of Pediatric Pd Patients With Kt/V Data' : 'Number Of Pediatric PD Patients With Kt/V Data', 
                        'Pediatric Pd Kt/V Data Availability Code' : 'Pediatric PD Kt/V Data Availability Code', 
                        'Number Of Pediatric Pd Patient-Months With Kt/V Data' : 'Number Of Pediatric PD Patient-Months With Kt/V Data', 
                        'Percentage Of Pediatric Pd Patients With Kt/V>=1.8' : 'Percentage Of Pediatric PD Patients With Kt/V >= 1.8', 
                        'Sir Date' : 'SIR Date', 
                        'Sir: Upper Confidence Limit (97.5%)' : 'SIR: Upper Confidence Limit (97.5%)', 
                        'Sir: Lower Confidence Limit (2.5%)' : 'SIR: Lower Confidence Limit (2.5%)', 
                        'Number Of Patient Months In Long Term Catheter Summary' : 'Number Of Patient-Months In Long Term Catheter Summary', 
                        'Number Of Patients In Npcr Summary' : 'Number Of Patients In nPCR Summary', 
                        'Number Of Patient-Months In Npcr Summary' : 'Number Of Patient-Months In nPCR Summary', 
                        'Npcr Data Availability Code' : 'nPCR Data Availability Code', 
                        'Percentage Of Pediatric Hd Patients With Npcr' : 'Percentage Of Pediatric HD Patients With nPCR', 
                        'Date_Swr' : 'SWR Date', 
                        'Swr Category Text' : 'SWR Category Text', 
                        '95% C.I. (Upper Limit) For Swr' : '95% C.I. (Upper Limit) For SWR', 
                        '95% C.I. (Lower Limit) For Swr' : '95% C.I. (Lower Limit) For SWR', 
                        'Number Of Patients In This Facility For Swr' : 'Number Of Patients In This Facility For SWR',  
                        'Pppw Category Text' : 'PPPW Category Text', 
                        '95% C.I. (Upper Limit) For Pppw' : '95% C.I. (Upper Limit) For PPPW', 
                        '95% C.I. (Lower Limit) For Pppw' : '95% C.I. (Lower Limit) For PPPW', 
                        'Number Of Patients For Pppw' : 'Number Of Patients For PPPW'
                       })

# Correct dtypes

In [5]:
isnumeric = ['Five Star', 
             'Percentage Of Medicare Patients With Hgb < 10 g/dL', 
             'Percentage Of Medicare Patients With Hgb > 12 g/dL', 
             'Number Of Dialysis Patients With Hgb Data', 
             'Number Of Patients Included In The Transfusion Summary', 
             'Percent Of Adult HD Patients With Kt/V >= 1.2', 
             'Percentage Of Adult PD PTS With Kt/V >= 1.7', 
             'Percentage Of Pediatric HD Patients With Kt/V >= 1.2', 
             'Number Of Adult HD Patients With Kt/V Data', 
             'Number Of Adult HD Patient-Months With Kt/V Data', 
             'Number Of Adult PD Patients With Kt/V Data', 
             'Number Of Adult PD Patient-Months With Kt/V Data', 
             'Number Of Pediatric HD Patients With Kt/V Data', 
             'Number Of Pediatric HD Patient-Months With Kt/V Data', 
             'Number Of Patients In Hypercalcemia Summary', 
             'Number Of Patient-Months In Hypercalcemia Summary', 
             'Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 mg/dL)', 
             'Number Of Patients In Serum Phosphorus Summary', 
             'Number Of Patient-Months In Serum Phosphorus Summary', 
             'Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 mg/dL', 
             'Percentage Of Adult Patients With Serum Phosphorus Between 3.5 - 4.5 mg/dL', 
             'Percentage Of Adult Patients With Serum Phosphorus Between 4.6 - 5.5 mg/dL', 
             'Percentage Of Adult Patients With Serum Phosphorus Between 5.6 - 7.0 mg/dL', 
             'Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 mg/dL', 
             'Number Of Patients Included In Hospitalization Summary', 
             'Number Of Hospitalizations Included In Hospital Readmission Summary', 
             'Number Of Patients Included In Survival Summary', 
             'Mortality Rate (Facility)', 
             'Mortality Rate: Upper Confidence Limit (97.5%)', 
             'Mortality Rate: Lower Confidence Limit (2.5%)', 
             'Readmission Rate (Facility)', 
             'Readmission Rate: Upper Confidence Limit (97.5%)', 
             'Readmission Rate: Lower Confidence Limit (2.5%)', 
             'Hospitalization Rate (Facility)', 
             'Hospitalization Rate: Upper Confidence Limit (97.5%)', 
             'Hospitalization Rate: Lower Confidence Limit (2.5%)', 
             'Number Of Pediatric PD Patients With Kt/V Data', 
             'Number Of Pediatric PD Patient-Months With Kt/V Data', 
             'Percentage Of Pediatric PD Patients With Kt/V >= 1.8', 
             'Standard Infection Ratio', 
             'SIR: Upper Confidence Limit (97.5%)', 
             'SIR: Lower Confidence Limit (2.5%)', 
             'Transfusion Rate (Facility)', 
             'Transfusion Rate: Upper Confidence Limit (97.5%)', 
             'Transfusion Rate: Lower Confidence Limit (2.5%)', 
             'Number Of Patients Included In Fistula Summary', 
             'Fistula Rate (Facility)', 
             'Fistula Rate: Upper Confidence Limit (97.5%)', 
             'Fistula Rate: Lower Confidence Limit (2.5%)', 
             'Number Of Patients In Long Term Catheter Summary', 
             'Number Of Patient-Months In Long Term Catheter Summary', 
             'Percentage Of Adult Patients With Long Term Catheter In Use', 
             'Number Of Patients In nPCR Summary', 
             'Number Of Patient-Months In nPCR Summary', 
             'Percentage Of Pediatric HD Patients With nPCR', 
             '95% C.I. (Upper Limit) For SWR', 
             '95% C.I. (Lower Limit) For SWR', 
             'Number Of Patients In This Facility For SWR', 
             'Standardized First Kidney Transplant Waitlist Ratio', 
             '95% C.I. (Upper Limit) For PPPW', 
             '95% C.I. (Lower Limit) For PPPW', 
             'Number Of Patients For PPPW', 
             'Percentage Of Prevalent Patients Waitlisted']

In [6]:
for col in  df[isnumeric]:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [7]:
isdateframe = ['Five Star Date', 'Claims Date', 'CROWNWeb Date', 'STrR Date', 'SHR Date', 'SRR Date', 'SMR Date', 'SIR Date', 'SWR Date']

In [8]:
isdate = ['Certification Or Recertification Date']

# Missing Values

In [9]:
df.replace(to_replace=['Not Available'], value=np.nan, inplace=True)

In [10]:
availabilitycode = ['Five Star Data Availability Code', 
                    'Hgb < 10 Data Availability Code', 
                    'Hgb > 12 Data Availability Code', 
                    'Patient Transfusion Data Availability Code', 
                    'Adult HD Kt/V Data Availability Code', 
                    'Adult PD Kt/V Data Availability Code', 
                    'Pediatric HD Kt/V Data Availability Code', 
                    'Hypercalcemia Data Availability Code', 
                    'Serum Phosphorus Data Availability Code', 
                    'Patient Hospitalization Data Availability Code', 
                    'Patient Hospital Readmission Data Availability Code', 
                    'Patient Survival Data Availability Code', 
                    'Pediatric PD Kt/V Data Availability Code', 
                    'Patient Infection Data Availability Code', 
                    'Fistula Data Availability Code', 
                    'Long Term Catheter Data Availability Code', 
                    'nPCR Data Availability Code', 
                    'Patient Transplant Waitlist Data Availability Code', 
                    'Patient Prevalent Transplant Waitlist Data Availability Code']

In [11]:
df[availabilitycode] = df[availabilitycode].replace(to_replace=[1], value=np.nan)

# Data Definition

### Head & Tail

In [12]:
df.iloc[:, :60].head().T

Unnamed: 0,0,1,2,3,4
Provider Number,42592,52761,72549,82524,82527
Network,13,18,1,4,4
Facility Name,DCI - LITTLE ROCK RENAL SERVICES LLC,DAVITA-PREMIER DIALYSIS CENTER,WALLINGFORD DIALYSIS CARE LLC,"DSI LAUREL DIALYSIS, LLC","FRESENIUS MEDICAL CARE SOUTHERN DELAWARE, LLC"
Five Star Date,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018
Five Star,2,3,5,4,5
Five Star Data Availability Code,,,,,
Address Line 1,1910 JOHN BARROW RD,7612 ATLANTIC AVENUE,720 N MAIN STREET EXT,LAUREL SQUARE SHOPPING CENTER,9115 ANTIQUE ALLEY
Address Line 2,,,SUITE 3,"30214 SUSSEX HIGHWAY, UNIT #4",
City,LITTLE ROCK,CUDAHY,WALLINGFORD,LAUREL,BRIDGEVILLE
State,AR,CA,CT,DE,DE


In [13]:
df.iloc[:, 60:].tail().T

Unnamed: 0,7621,7622,7623,7624,7625
Patient Hospitalization Category Text,Worse than Expected,As Expected,,As Expected,As Expected
Patient Hospitalization Data Availability Code,,,258,,
Patient Hospital Readmission Category Text,As Expected,As Expected,,As Expected,As Expected
Patient Hospital Readmission Data Availability Code,,,258,,
Patient Survival Category Text,As Expected,As Expected,,As Expected,As Expected
Patient Survival Data Availability Code,,,258,,
Number Of Patients Included In Hospitalization Summary,64,62,,55,101
Number Of Hospitalizations Included In Hospital Readmission Summary,121,110,,59,123
Number Of Patients Included In Survival Summary,243,223,,222,448
Mortality Rate (Facility),29.4,16.1,,18.4,23.5


### Shape

In [14]:
df.shape

(7626, 119)

### Column Names

In [15]:
for col in df.columns:
    print(col)

Provider Number
Network
Facility Name
Five Star Date
Five Star
Five Star Data Availability Code
Address Line 1
Address Line 2
City
State
Zip
County
Phone Number
Profit Or Non-Profit
Chain Owned
Chain Organization
Late Shift
Number Of Dialysis Stations
Offers In-Center Hemodialysis
Offers Peritoneal Dialysis
Offers Home Hemodialysis Training
Certification Or Recertification Date
Claims Date
CROWNWeb Date
STrR Date
Percentage Of Medicare Patients With Hgb < 10 g/dL
Hgb < 10 Data Availability Code
Percentage Of Medicare Patients With Hgb > 12 g/dL
Hgb > 12 Data Availability Code
Number Of Dialysis Patients With Hgb Data
Patient Transfusion Data Availability Code
Patient Transfusion Category Text
Number Of Patients Included In The Transfusion Summary
Percent Of Adult HD Patients With Kt/V >= 1.2
Adult HD Kt/V Data Availability Code
Percentage Of Adult PD PTS With Kt/V >= 1.7
Adult PD Kt/V Data Availability Code
Percentage Of Pediatric HD Patients With Kt/V >= 1.2
Pediatric HD Kt/V Data Ava

### Data Types

In [16]:
df.iloc[:, :60].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7626 entries, 0 to 7625
Data columns (total 60 columns):
 #   Column                                                                                   Non-Null Count  Dtype  
---  ------                                                                                   --------------  -----  
 0   Provider Number                                                                          7626 non-null   int64  
 1   Network                                                                                  7626 non-null   int64  
 2   Facility Name                                                                            7626 non-null   object 
 3   Five Star Date                                                                           7626 non-null   object 
 4   Five Star                                                                                6725 non-null   float64
 5   Five Star Data Availability Code                              

In [17]:
df.iloc[:, 60:].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7626 entries, 0 to 7625
Data columns (total 59 columns):
 #   Column                                                               Non-Null Count  Dtype  
---  ------                                                               --------------  -----  
 0   Patient Hospitalization Category Text                                6857 non-null   object 
 1   Patient Hospitalization Data Availability Code                       769 non-null    float64
 2   Patient Hospital Readmission Category Text                           6806 non-null   object 
 3   Patient Hospital Readmission Data Availability Code                  820 non-null    float64
 4   Patient Survival Category Text                                       6701 non-null   object 
 5   Patient Survival Data Availability Code                              925 non-null    float64
 6   Number Of Patients Included In Hospitalization Summary               7552 non-null   float64
 7   Number

### Count of unique values or codes

In [18]:
df.iloc[:, :60].nunique()

Provider Number                                                                            7626
Network                                                                                      18
Facility Name                                                                              7448
Five Star Date                                                                                1
Five Star                                                                                     5
Five Star Data Availability Code                                                              3
Address Line 1                                                                             7574
Address Line 2                                                                              599
City                                                                                       3001
State                                                                                        56
Zip                                     

In [19]:
df.iloc[:, 60:].nunique()

Patient Hospitalization Category Text                                     3
Patient Hospitalization Data Availability Code                            4
Patient Hospital Readmission Category Text                                3
Patient Hospital Readmission Data Availability Code                       3
Patient Survival Category Text                                            3
Patient Survival Data Availability Code                                   4
Number Of Patients Included In Hospitalization Summary                  249
Number Of Hospitalizations Included In Hospital Readmission Summary     295
Number Of Patients Included In Survival Summary                         807
Mortality Rate (Facility)                                               390
Mortality Rate: Upper Confidence Limit (97.5%)                          545
Mortality Rate: Lower Confidence Limit (2.5%)                           323
Readmission Rate (Facility)                                             463
Readmission 

### Percent unique values

In [20]:
df.iloc[:, :60].nunique()/df.iloc[:, :60].size

Provider Number                                                                            0.016667
Network                                                                                    0.000039
Facility Name                                                                              0.016278
Five Star Date                                                                             0.000002
Five Star                                                                                  0.000011
Five Star Data Availability Code                                                           0.000007
Address Line 1                                                                             0.016553
Address Line 2                                                                             0.001309
City                                                                                       0.006559
State                                                                                      0.000122


In [21]:
df.iloc[:, 60:].nunique()/df.iloc[:, 60:].size

Patient Hospitalization Category Text                                  0.000007
Patient Hospitalization Data Availability Code                         0.000009
Patient Hospital Readmission Category Text                             0.000007
Patient Hospital Readmission Data Availability Code                    0.000007
Patient Survival Category Text                                         0.000007
Patient Survival Data Availability Code                                0.000009
Number Of Patients Included In Hospitalization Summary                 0.000553
Number Of Hospitalizations Included In Hospital Readmission Summary    0.000656
Number Of Patients Included In Survival Summary                        0.001794
Mortality Rate (Facility)                                              0.000867
Mortality Rate: Upper Confidence Limit (97.5%)                         0.001211
Mortality Rate: Lower Confidence Limit (2.5%)                          0.000718
Readmission Rate (Facility)             

### Summary statistics

In [22]:
df.iloc[:, :30].describe()

Unnamed: 0,Provider Number,Network,Five Star,Five Star Data Availability Code,Zip,Late Shift,Number Of Dialysis Stations,Offers In-Center Hemodialysis,Offers Peritoneal Dialysis,Offers Home Hemodialysis Training,Percentage Of Medicare Patients With Hgb < 10 g/dL,Hgb < 10 Data Availability Code,Percentage Of Medicare Patients With Hgb > 12 g/dL,Hgb > 12 Data Availability Code,Number Of Dialysis Patients With Hgb Data
count,7626.0,7626.0,6725.0,901.0,7626.0,7626.0,7626.0,7626.0,7626.0,7626.0,6517.0,1109.0,6517.0,1109.0,7555.0
mean,315876.077105,9.660372,3.708699,259.368479,50297.359691,0.16706,17.463284,0.940336,0.525439,0.29006,19.822157,213.069432,0.223262,213.069432,36.481138
std,193257.685562,4.708672,1.032081,0.935512,27581.675445,0.373054,8.49554,0.236879,0.499385,0.45382,12.851783,24.576662,1.352427,24.576662,26.062896
min,12306.0,1.0,1.0,258.0,603.0,0.0,0.0,0.0,0.0,0.0,0.0,199.0,0.0,199.0,0.0
25%,142822.25,6.0,3.0,258.0,29533.0,0.0,12.0,1.0,0.0,0.0,11.0,199.0,0.0,199.0,18.0
50%,322545.5,9.0,4.0,260.0,46032.0,0.0,17.0,1.0,1.0,0.0,17.0,199.0,0.0,199.0,32.0
75%,452511.75,14.0,5.0,260.0,76060.75,0.0,22.0,1.0,1.0,1.0,25.0,201.0,0.0,201.0,50.0
max,852568.0,18.0,5.0,261.0,99801.0,1.0,80.0,1.0,1.0,1.0,100.0,258.0,47.0,258.0,267.0


In [23]:
df.iloc[:, 30:50].describe()

Unnamed: 0,Patient Transfusion Data Availability Code,Number Of Patients Included In The Transfusion Summary,Percent Of Adult HD Patients With Kt/V >= 1.2,Adult HD Kt/V Data Availability Code,Percentage Of Adult PD PTS With Kt/V >= 1.7,Adult PD Kt/V Data Availability Code,Percentage Of Pediatric HD Patients With Kt/V >= 1.2,Pediatric HD Kt/V Data Availability Code,Number Of Adult HD Patients With Kt/V Data,Number Of Adult HD Patient-Months With Kt/V Data,Number Of Adult PD Patients With Kt/V Data,Number Of Adult PD Patient-Months With Kt/V Data,Number Of Pediatric HD Patients With Kt/V Data,Number Of Pediatric HD Patient-Months With Kt/V Data,Hypercalcemia Data Availability Code,Number Of Patients In Hypercalcemia Summary,Number Of Patient-Months In Hypercalcemia Summary,Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 mg/dL),Number Of Patients In Serum Phosphorus Summary
count,1341.0,7552.0,6755.0,871.0,2146.0,5480.0,16.0,7610.0,7555.0,7175.0,7555.0,3358.0,7555.0,202.0,422.0,7555.0,7463.0,7204.0,7555.0
mean,228.988069,54.979211,96.615692,222.183697,91.467381,236.685949,92.0625,255.649409,70.159894,638.908153,9.577234,170.791543,0.097022,24.277228,227.831754,82.751952,732.713118,2.101333,87.066314
std,29.468853,38.443508,4.855055,27.955601,10.554205,27.393859,6.697947,13.620211,49.355366,436.725291,18.697931,192.258892,0.903753,31.545472,29.088572,55.699139,509.599704,4.939647,58.294605
min,199.0,0.0,0.0,199.0,0.0,199.0,79.0,199.0,0.0,1.0,0.0,1.0,0.0,1.0,199.0,0.0,0.0,0.0,0.0
25%,199.0,28.0,96.0,199.0,90.0,201.0,88.0,259.0,35.0,324.5,0.0,53.0,0.0,5.0,199.0,43.0,365.5,0.0,46.0
50%,258.0,49.0,98.0,201.0,95.0,257.0,92.0,259.0,65.0,578.0,0.0,114.0,0.0,12.0,201.0,73.0,643.0,1.0,78.0
75%,258.0,75.0,99.0,256.0,97.0,257.0,99.25,259.0,98.0,871.0,13.0,216.0,0.0,29.5,258.0,112.0,992.0,2.0,117.0
max,258.0,455.0,100.0,258.0,100.0,258.0,100.0,259.0,813.0,3834.0,255.0,2080.0,22.0,186.0,258.0,858.0,5063.0,99.0,964.0


In [24]:
df.iloc[:, 50:70].describe()

Unnamed: 0,Number Of Patient-Months In Serum Phosphorus Summary,Serum Phosphorus Data Availability Code,Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 mg/dL,Percentage Of Adult Patients With Serum Phosphorus Between 3.5 - 4.5 mg/dL,Percentage Of Adult Patients With Serum Phosphorus Between 4.6 - 5.5 mg/dL,Percentage Of Adult Patients With Serum Phosphorus Between 5.6 - 7.0 mg/dL,Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 mg/dL,Patient Hospitalization Data Availability Code,Patient Hospital Readmission Data Availability Code,Patient Survival Data Availability Code,Number Of Patients Included In Hospitalization Summary,Number Of Hospitalizations Included In Hospital Readmission Summary,Number Of Patients Included In Survival Summary,Mortality Rate (Facility)
count,7463.0,411.0,7215.0,7215.0,7215.0,7215.0,7215.0,769.0,820.0,925.0,7552.0,7553.0,7552.0,6701.0
mean,753.179016,225.939173,8.09508,24.658073,31.062786,21.986556,14.193486,245.812744,240.004878,242.057297,64.622484,72.216867,251.310249,22.156902
std,522.367689,28.909678,3.33236,4.735087,5.184275,5.28059,5.21907,23.859371,27.173121,26.189824,44.263213,54.563397,185.960863,6.101124
min,0.0,199.0,0.0,4.0,8.0,2.0,0.0,199.0,199.0,199.0,0.0,0.0,0.0,0.0
25%,377.0,199.0,6.0,22.0,27.0,18.0,11.0,258.0,199.0,199.0,34.0,33.0,113.0,18.3
50%,661.0,201.0,8.0,24.0,31.0,22.0,14.0,258.0,258.0,258.0,59.0,63.0,228.0,21.6
75%,1018.0,258.0,10.0,28.0,34.0,26.0,17.0,258.0,258.0,258.0,88.0,101.0,353.0,25.3
max,5217.0,258.0,46.0,52.0,61.0,50.0,43.0,258.0,258.0,258.0,654.0,800.0,2115.0,72.8


In [25]:
df.iloc[:, 70:90].describe()

Unnamed: 0,Mortality Rate: Upper Confidence Limit (97.5%),Mortality Rate: Lower Confidence Limit (2.5%),Readmission Rate (Facility),Readmission Rate: Upper Confidence Limit (97.5%),Readmission Rate: Lower Confidence Limit (2.5%),Hospitalization Rate (Facility),Hospitalization Rate: Upper Confidence Limit (97.5%),Hospitalization Rate: Lower Confidence Limit (2.5%),Number Of Pediatric PD Patients With Kt/V Data,Pediatric PD Kt/V Data Availability Code,Number Of Pediatric PD Patient-Months With Kt/V Data,Percentage Of Pediatric PD Patients With Kt/V >= 1.8,Patient Infection Data Availability Code,Standard Infection Ratio,SIR: Upper Confidence Limit (97.5%),SIR: Lower Confidence Limit (2.5%),Transfusion Rate (Facility),Transfusion Rate: Upper Confidence Limit (97.5%)
count,6701.0,6701.0,6806.0,6806.0,6806.0,6857.0,6856.0,6857.0,7555.0,7597.0,150.0,29.0,1375.0,6251.0,6251.0,5368.0,6285.0,6285.0
mean,32.513953,14.887465,26.789994,43.185616,14.72561,186.714511,317.215271,117.489733,0.115288,256.813611,42.746667,76.413793,228.077091,0.74788,2.064434,0.30052,20.85825,98.287224
std,9.932624,5.161044,7.757977,9.597002,6.675535,50.618308,83.540713,39.968571,1.147835,11.108636,46.106411,20.536583,28.638715,0.667919,1.248678,0.344914,16.960678,50.327271
min,7.9,0.0,0.0,14.1,0.0,0.0,143.4,0.0,0.0,199.0,1.0,23.0,199.0,0.0,0.16,0.0,0.0,15.5
25%,26.1,11.8,22.1,36.6,10.2,153.3,263.9,90.8,0.0,259.0,12.0,76.0,201.0,0.29,1.21,0.06,8.9,61.0
50%,30.6,14.8,26.8,42.2,15.1,183.4,302.85,116.4,0.0,259.0,25.0,82.0,201.0,0.61,1.78,0.19,17.7,86.0
75%,36.3,18.0,31.7,48.2,19.2,216.1,349.9,142.4,0.0,259.0,65.75,88.0,258.0,1.07,2.6,0.43,28.1,124.4
max,108.7,48.2,98.8,143.4,49.8,725.2,1114.2,489.1,29.0,259.0,224.0,99.0,258.0,11.03,15.11,7.88,213.3,211.5


In [26]:
df.iloc[:, 90:].describe()

Unnamed: 0,Transfusion Rate: Lower Confidence Limit (2.5%),Fistula Data Availability Code,Number Of Patients Included In Fistula Summary,Fistula Rate (Facility),Fistula Rate: Upper Confidence Limit (97.5%),Fistula Rate: Lower Confidence Limit (2.5%),Number Of Patients In Long Term Catheter Summary,Number Of Patient-Months In Long Term Catheter Summary,Long Term Catheter Data Availability Code,Percentage Of Adult Patients With Long Term Catheter In Use,...,Patient Transplant Waitlist Data Availability Code,95% C.I. (Upper Limit) For SWR,95% C.I. (Lower Limit) For SWR,Number Of Patients In This Facility For SWR,Standardized First Kidney Transplant Waitlist Ratio,Patient Prevalent Transplant Waitlist Data Availability Code,95% C.I. (Upper Limit) For PPPW,95% C.I. (Lower Limit) For PPPW,Number Of Patients For PPPW,Percentage Of Prevalent Patients Waitlisted
count,6285.0,711.0,7555.0,6915.0,6915.0,6915.0,7555.0,7298.0,711.0,6915.0,...,3515.0,4111.0,4111.0,7554.0,4111.0,448.0,7178.0,7178.0,7555.0,7178.0
mean,7.51424,231.540084,78.452548,62.978279,83.844469,40.129573,78.452548,697.241573,225.556962,12.543167,...,210.778947,2.605145,0.337152,29.975377,0.998375,235.805804,46.349624,7.008721,66.653077,18.930189
std,8.711241,28.908954,54.087599,10.915107,10.85875,14.726152,54.087599,479.899379,28.662471,7.314833,...,23.509388,1.311779,0.432127,24.149303,0.819295,28.550521,15.886783,6.73176,45.911346,11.171761
min,0.0,199.0,0.0,0.0,0.0,0.0,0.0,1.0,199.0,0.0,...,199.0,0.23,0.0,0.0,0.0,199.0,0.0,0.0,0.0,0.0
25%,1.4,199.0,40.0,56.5,77.4,31.3,40.0,351.25,199.0,8.0,...,199.0,1.65,0.04,12.0,0.42,199.0,35.325,2.2,34.0,11.0
50%,4.8,256.0,72.0,63.6,84.4,41.4,72.0,628.0,201.0,11.0,...,199.0,2.35,0.19,26.0,0.82,258.0,44.8,5.1,58.0,17.0
75%,10.7,258.0,108.5,70.3,91.4,50.3,108.5,954.0,258.0,16.0,...,199.0,3.25,0.475,43.0,1.36,258.0,56.4,9.7,89.0,24.8
max,119.6,258.0,928.0,98.7,100.0,93.5,928.0,4201.0,258.0,88.0,...,258.0,13.16,5.08,232.0,8.43,258.0,99.2,57.3,468.0,92.4


### Range of values per column

In [27]:
df.iloc[:, :60].agg([min, max]).T

Unnamed: 0,min,max
Provider Number,12306,852568
Network,1,18
Facility Name,A UNIQUE KIDNEY CENTER LLC,ZILLMAR DIALYSIS LLC
Five Star Date,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018
Five Star,1,5
Five Star Data Availability Code,258,261
Address Line 1,# 7 PROFESSIONAL DRIVE,WHEATON PARK SHOPPING CTR
City,ABBEVILLE,ZUNI
State,AK,WY
Zip,603,99801


In [28]:
df.iloc[:, 60:].agg([min, max]).T

Unnamed: 0,min,max
Patient Hospitalization Data Availability Code,199,258
Patient Hospital Readmission Data Availability Code,199,258
Patient Survival Data Availability Code,199,258
Number Of Patients Included In Hospitalization Summary,0,654
Number Of Hospitalizations Included In Hospital Readmission Summary,0,800
Number Of Patients Included In Survival Summary,0,2115
Mortality Rate (Facility),0,72.8
Mortality Rate: Upper Confidence Limit (97.5%),7.9,108.7
Mortality Rate: Lower Confidence Limit (2.5%),0,48.2
Readmission Rate (Facility),0,98.8


### Duplicate rows

In [29]:
df[df.duplicated()]

Unnamed: 0,Provider Number,Network,Facility Name,Five Star Date,Five Star,Five Star Data Availability Code,Address Line 1,Address Line 2,City,State,...,95% C.I. (Lower Limit) For SWR,Number Of Patients In This Facility For SWR,Standardized First Kidney Transplant Waitlist Ratio,PPPW Category Text,Patient Prevalent Transplant Waitlist Data Availability Code,95% C.I. (Upper Limit) For PPPW,95% C.I. (Lower Limit) For PPPW,Number Of Patients For PPPW,Percentage Of Prevalent Patients Waitlisted,Location


# Data Definitions

### Provider Number
Data type: int64

Value count: 7626

Number of unique values: 7626

Percent unique values: 0.016667

Ranges of values: 12306 - 852568

Length of values: 5, 6

Description: Lists the numeric code used to identify the provider listed.

In [30]:
set([(len(str(num))) for num in df['Provider Number']])

{5, 6}

### Network
Data type: int64

Value count: 7626 

Number of unique values: 18

Percent unique values: 0.000039

Ranges of values: 1 - 18

Length of values: 1, 2

Description: Lists the numeric code representing geographic area of all Medicare-approved ESRD (End Stage Renal Disease) facilities. 
Map of the network can be found here:
https://esrdncc.org/en/ESRD-network-map/

Value description:
- 1   : Network 1 (CT, ME, MA, NH,RI,VT)
- 2   : Network 2 (NY)
- 3   : Network 3 (NJ, PR, VI) 
- 4   : Network 4 (DE, PA)
- 5   : Network 5 (DC, MD, VA,WV)
- 6   : Network 6 (GA, NC, SC)
- 7   : Network 7 (FL)
- 8   : Network 8 (AL, MS, TN)
- 9   : Network 9 (IN, KY, OH)
- 10  : Network 10 (IL)
- 11  : Network 11 (MI, MN, ND, SD,WI)
- 12  : Network 12 (IA, KS, MO, NE)
- 13  : Network 13 (AR, LA, OK)
- 14  : Network 14 (TX)
- 15  : Network 15 (AZ, CO, NV, NM, UT,WY)
- 16  : Network 16 (AK, ID, MT, OR, WA)
- 17  : Network 17 (AS, GU, HI, MP, N. CA)
- 18  : Network 18 (S. CA)

In [31]:
df['Network'].value_counts()

6     775
14    749
9     651
11    510
7     503
8     482
5     449
18    428
15    374
4     359
13    349
10    345
12    339
2     324
17    319
3     246
16    227
1     197
Name: Network, dtype: int64

### Facility Name
Data type: object

Value count: 7626 

Number of unique values: 7448

Percent unique values: 0.016278

Range of values: A UNIQUE KIDNEY CENTER LLC - ZILLMAR DIALYSIS LLC

Length of values: 7 - 68

Description: Lists the name of the facility listed.

In [32]:
min(set([(len(str(num))) for num in df['Facility Name']]))

7

In [33]:
max(set([(len(str(num))) for num in df['Facility Name']]))

68

### Five Star Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2015 - 12/31/2018"

Length of values: 23

Description: Lists the data collection period for the five star rating.

In [34]:
set([(len(str(num))) for num in df['Five Star Date']])

{23}

### Five Star
Data type: float64

Value count: 6725

Number of unique values: 5

Percent unique values: 0.000011

Ranges of values: 1 - 5

Length of values: 1, 13

Description: Lists the 5-star rating for the facility. The “Dialysis Facility Compare (DFC) Star Program” is a rating system developed by Medicare that assigns 1 to 5 stars to dialysis facilities by comparing the health of the patients in their clinics to the patients in other dialysis facilities across the country.

In [35]:
df['Five Star'].value_counts()

3.0    2269
4.0    1937
5.0    1844
2.0     491
1.0     184
Name: Five Star, dtype: int64

### Five Star Data Availability Code
Data type: float64

Value count: 901

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 258, 260, 261

Length of values: 3

Description: Lists whether the facility had sufficient five star data available or the reason for why the data is not available.

Value description:
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 260 : Not enough quality measure data to calculate a star rating.
- 261 : Medicare determined that at least one measure included in the star rating calculation was not accurate for this dialysis center.

In [36]:
df['Five Star Data Availability Code'].dtype

dtype('float64')

In [37]:
df['Five Star Data Availability Code'].value_counts()

260.0    612
258.0    286
261.0      3
Name: Five Star Data Availability Code, dtype: int64

### Address Line 1
Data type: object

Value count: 7626

Number of unique values: 7574

Percent unique values: 0.016553

Range of values: # 7 PROFESSIONAL DRIVE - WHEATON PARK SHOPPING CTR

Length of values: 4 - 59

Description: Lists the first line of the address that corresponds to the facility listed.

In [38]:
df['Address Line 1'].head()

0              1910 JOHN BARROW RD
1             7612 ATLANTIC AVENUE
2            720 N MAIN STREET EXT
3    LAUREL SQUARE SHOPPING CENTER
4               9115 ANTIQUE ALLEY
Name: Address Line 1, dtype: object

In [39]:
min(set([(len(str(num))) for num in df['Address Line 1']]))

4

In [40]:
max(set([(len(str(num))) for num in df['Address Line 1']]))

59

### Address Line 2
Data type: object

Value count: 1087

Number of unique values: 599

Percent unique values: 0.001309

Length of values: 2 - 47

Description: Lists the second line of the address that corresponds to the facility listed.

In [41]:
min(set([(len(str(num))) for num in df['Address Line 2']]))

2

In [42]:
max(set([(len(str(num))) for num in df['Address Line 2']]))

47

### City
Data type: object

Value count: 7626

Number of unique values: 3001

Percent unique values: 0.006559

Range of values: ABBEVILLE - ZUNI

Length of values: 

Description: Lists the city that corresponds to the facility listed.

In [43]:
df['City'].value_counts()

HOUSTON           108
CHICAGO            76
PHILADELPHIA       55
SAN ANTONIO        53
COLUMBUS           41
                 ... 
MURRELLS INLET      1
ITHACA              1
CAROL STREAM        1
KAYENTA             1
PRESTONSBURG        1
Name: City, Length: 3001, dtype: int64

In [44]:
min(set([(len(str(num))) for num in df['City']]))

3

In [45]:
max(set([(len(str(num))) for num in df['City']]))

20

### State
Data type: object

Value count: 7626

Number of unique values: 56

Percent unique values: 0.000122

Range of values: AK - WY

Length of values: 2

Description: Lists the alphabetic postal code used to identify the state that corresponds to the facility listed.

In [46]:
df['State'].value_counts()

TX    749
CA    706
FL    503
GA    376
OH    347
IL    345
PA    326
NY    324
NC    240
MI    220
VA    208
TN    205
NJ    192
LA    189
AL    186
IN    178
MD    173
MO    165
SC    159
AZ    128
WI    127
KY    126
MN    120
WA    100
MS     91
OK     89
MA     83
CO     80
OR     74
AR     71
IA     69
KS     65
NM     57
NV     52
CT     51
PR     50
UT     47
WV     46
NE     40
DE     33
HI     32
SD     28
ID     27
DC     22
NH     20
ME     19
MT     17
RI     16
ND     15
WY     10
AK      9
VT      8
GU      5
VI      4
MP      2
AS      2
Name: State, dtype: int64

### Zip
Data type: int64

Value count: 7626

Number of unique values: 5276

Percent unique values: 0.011531

Ranges of values: 603 - 99801

Length of values: 3 - 5

Description: Lists the full postal ZIP code that corresponds to the facility listed.

In [47]:
set([(len(str(num))) for num in df['Zip']])

{3, 4, 5}

In [48]:
df['Zip'].sort_values()

5450      603
113       605
349       612
572       612
1530      646
        ...  
4944    99515
2013    99654
1648    99669
3097    99701
888     99801
Name: Zip, Length: 7626, dtype: int64

### County
Data type: object

Value count: 7626

Number of unique values: 1258

Percent unique values: 0.002749

Range of values: ABBEVILLE - ZAVALA

Length of values: 3 - 21

Description: Lists the name of the county that corresponds to the facility listed.

In [49]:
min(set([(len(str(num))) for num in df['County']]))

3

In [50]:
max(set([(len(str(num))) for num in df['County']]))

21

### Phone Number
Data type: object

Value count: 7626

Number of unique values: 7581

Percent unique values: 0.016568

Range of values: (201) 262-0429	- (989) 921-2170

Length of values: 14

Description: Lists the telephone number that corresponds to the facility listed.

In [51]:
set([(len(str(num))) for num in df['Phone Number']])

{14}

### Profit Or Non-Profit
Data type: object

Value count: 7627

Number of unique values: 2

Percent unique values: 0.000004

Ranges of values: Non-Profit, Profit 

Length of values: 6, 10

Description: Indicates if the dialysis facility’s operates as a for profit or non-profit business.

In [52]:
df['Profit Or Non-Profit'].value_counts()

Profit        6759
Non-Profit     867
Name: Profit Or Non-Profit, dtype: int64

In [53]:
set([(len(str(num))) for num in df['Profit Or Non-Profit']])

{6, 10}

### Chain Owned
Data type: object

Value count: 7626

Number of unique values: 2

Percent unique values: 0.000004

Ranges of values: No, Yes

Length of values: 2 - 3

Description: Indicates whether or not the facility is owned or managed by a chain organization.

In [54]:
df['Chain Owned'].value_counts()

Yes    6793
No      833
Name: Chain Owned, dtype: int64

### Chain Organization
Data type: object

Value count: 7625

Number of unique values: 111

Percent unique values: 0.000243

Length of values: 3 - 39

Description: Lists the name of the chain organization if applicable.

In [55]:
min(set([(len(str(num))) for num in df['Chain Organization']]))

3

In [56]:
max(set([(len(str(num))) for num in df['Chain Organization']]))

39

### Late Shift
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0.000004

Mean: 0.167060

Ranges of values: 0, 1

Length of values: 1

Description: Lists whether or not the facility has a shift starting at 5:00 p.m. or later.

In [57]:
df['Late Shift'].value_counts()

0    6352
1    1274
Name: Late Shift, dtype: int64

###  Number Of Dialysis Stations
Data type: int64

Value count: 7626

Number of unique values: 68

Percent unique values: 0.000149

Ranges of values: 0 - 80

Mean: 17.463284

Length of values: 1 - 2

Description: Indicates the total number of dialysis stations at the dialysis facility. 

### Offers In-Center Hemodialysis
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0.000004

Mean: 0.940336	

Ranges of values: 0, 1

Length of values: 1

Description: Indicates whether the facility offers in-center hemodialysis. 

In [58]:
df['Offers In-Center Hemodialysis'].value_counts()

1    7171
0     455
Name: Offers In-Center Hemodialysis, dtype: int64

### Offers Peritoneal Dialysis
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0.000004

Mean: 0.525439

Ranges of values: 0, 1

Length of values: 1

Description: Indicates whether the facility offers peritoneal dialysis.

In [59]:
df['Offers Peritoneal Dialysis'].value_counts()

1    4007
0    3619
Name: Offers Peritoneal Dialysis, dtype: int64

### Offers Home Hemodialysis Training
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0.000004

Mean: 0.29006

Ranges of values: 0, 1

Length of values: 1

Description: Indicates whether the facility offers home hemodialysis training.

In [60]:
df['Offers Home Hemodialysis Training'].value_counts()

0    5414
1    2212
Name: Offers Home Hemodialysis Training, dtype: int64

### Certification Or Recertification Date
Data type: object

Value count: 7626

Number of unique values: 4800

Percent unique values: 0.010490

Range of values: 01/01/1968 - 12/31/2017

Length of values: 10

Description: Lists the initial or recertification date for the facility listed. These facilities are certified if they pass inspection. Medicare or Medicaid only covers care provided by certified providers. Being certified is not the same as being accredited. 

In [61]:
set([(len(str(num))) for num in df['Certification Or Recertification Date']])

{10}

### Claims Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "07/01/2018 - 06/30/2019"

Length of values: 23

Description: Lists the data collection period for claims-based summaries.

In [62]:
set([(len(str(num))) for num in df['Claims Date']])

{23}

### CROWNWeb Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "07/01/2018 - 06/30/2019"

Length of values: 23

Description: Lists the data collection period for CROWNWeb based measures.

In [63]:
set([(len(str(num))) for num in df['CROWNWeb Date']])

{23}

### STrR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient transfusion summary (STrR).

In [64]:
set([(len(str(num))) for num in df['STrR Date']])

{23}

### Percentage Of Medicare Patients With Hgb < 10 g/dL
Data type: float64

Value count: 6517

Number of unique values: 90

Percent unique values: 0.000197

Ranges of values: 0 - 

Length of values: 1-3, 13

Description: Lists the percentage of patients who had average hemoglobin (hgb) less than 10.0 g/dL. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

In [65]:
set([(len(str(num))) for num in df['Percentage Of Medicare Patients With Hgb < 10 g/dL']])

{3, 4, 5}

### Hgb < 10 Data Availability Code
Data type: float64

Value count: 1109

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 201, 258 

Length of values: 3

Description: Lists whether the facility had sufficient hemoglobin (hgb) data available or the reason for why the data is not available. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [66]:
df['Hgb < 10 Data Availability Code'].value_counts()

199.0    575
201.0    279
258.0    255
Name: Hgb < 10 Data Availability Code, dtype: int64

### Percentage Of Medicare Patients With Hgb > 12 g/dL
Data type: float64

Value count: 6517

Number of unique values: 20

Percent unique values: 0.000044

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the percentage of patients who had average hemoglobin (hgb) greater than 12.0 g/dL. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

In [67]:
set([(len(str(num))) for num in df['Percentage Of Medicare Patients With Hgb > 12 g/dL']])

{3, 4}

### Hgb > 12 Data Availability Code
Data type: float64

Value count: 1109

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 201, 258

Length of values: 3

Description: Lists whether the facility had sufficient hemoglobin (hgb) data available or the reason for why the data is not available. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [68]:
df['Hgb > 12 Data Availability Code'].value_counts()

199.0    575
201.0    279
258.0    255
Name: Hgb > 12 Data Availability Code, dtype: int64

### Number Of Dialysis Patients With Hgb Data
Data type: float64

Value count: 7555

Number of unique values: 159

Percent unique values: 0.000347

Ranges of values: 0 - 

Length of values: 1-3

Description: Lists the number of patients included in the hemoglobin (hgb) greater than 12.0 g/dL summary. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

In [69]:
set([(len(str(num))) for num in df['Number Of Dialysis Patients With Hgb Data']])

{3, 4, 5}

### Patient Transfusion Data Availability Code
Data type: float64

Value count: 1341

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 255, 258

Length of values: 3

Description: Lists whether the facility had sufficient transfusion data available or the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [70]:
df['Patient Transfusion Data Availability Code'].value_counts()

258.0    678
199.0    638
201.0     22
255.0      3
Name: Patient Transfusion Data Availability Code, dtype: int64

### Patient Transfusion Category Text
Data type: object

Value count: 6285

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Not Available, Worse than Expected, Better than Expected 

Length of values: 3, 11, 19, 20

Description: Patient transfusion category.

Value description:
- As Expected          : Patient transfusions categorized as “As Expected”       
- Worse than Expected  :  Patient transfusions categorized as “Worse than Expected”
- Better than Expected : Patient transfusions categorized as “Better than Expected"
- Not Available 

In [71]:
df['Patient Transfusion Category Text'].value_counts()

As Expected             5851
Worse than Expected      430
Better than Expected       4
Name: Patient Transfusion Category Text, dtype: int64

In [72]:
set([(len(str(num))) for num in df['Patient Transfusion Category Text']])

{3, 11, 19, 20}

### Number Of Patients Included In The Transfusion Summary
Data type: float64

Value count: 7552

Number of unique values: 225

Percent unique values: 0.000492

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility’s transfusion summary.

In [73]:
set([(len(str(num))) for num in df['Number Of Patients Included In The Transfusion Summary']])

{3, 4, 5}

### Percent Of Adult HD Patients With Kt/V >= 1.2
Data type: float64

Value count: 6755

Number of unique values: 53

Percent unique values: 0.000116 

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the percentage of adult HD patients with Kt/V greater than or equal to 1.2. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [74]:
set([(len(str(num))) for num in df['Percent Of Adult HD Patients With Kt/V >= 1.2']])

{3, 4, 5}

### Adult HD Kt/V Data Availability Code
Data type: float64

Value count: 871

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 1, 199, 201, 256, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient adult HD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description: 
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 256 : The dialysis center does not provide hemodialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [75]:
df['Adult HD Kt/V Data Availability Code'].value_counts()

199.0    294
201.0    238
258.0    197
256.0    142
Name: Adult HD Kt/V Data Availability Code, dtype: int64

### Percentage Of Adult PD PTS With Kt/V >= 1.7
Data type: float64

Value count: 2146

Number of unique values: 70

Percent unique values: 0.000153

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the percentage of adult PD PTS patients with Kt/V greater than or equal to 1.7. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [76]:
set([(len(str(num))) for num in df['Percentage Of Adult PD PTS With Kt/V >= 1.7']])

{3, 4, 5}

### Adult PD Kt/V Data Availability Code
Data type: float64 

Value count: 5480

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 1, 199, 201, 257, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient adult PD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 257 : The dialysis center does not provide peritoneal dialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data.

In [77]:
df['Adult PD Kt/V Data Availability Code'].value_counts()

257.0    3362
199.0    1116
201.0     835
258.0     167
Name: Adult PD Kt/V Data Availability Code, dtype: int64

### Percentage Of Pediatric HD Patients With Kt/V >= 1.2
Data type: float64

Value count: 16

Number of unique values: 11

Percent unique values: 0.000024

Ranges of values: 100 - 

Length of values: 2, 3

Description: Lists the percentage of pediatric HD patients with Kt/V greater than or equal to 1.2. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [78]:
set([(len(str(num))) for num in df['Percentage Of Pediatric HD Patients With Kt/V >= 1.2']])

{3, 4, 5}

### Pediatric HD Kt/V Data Availability Code
Data type: float64

Value count: 7610

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 258, 259

Length of values: 3

Description: Lists whether the facility had sufficient Pediatric HD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment(characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 259 : The dialysis center does not provide hemodialysis and/or peritoneal dialysis to pediatric patients during the reporting period.

In [79]:
df['Pediatric HD Kt/V Data Availability Code'].value_counts()

259.0    7106
201.0     247
199.0     185
258.0      72
Name: Pediatric HD Kt/V Data Availability Code, dtype: int64

### Number Of Adult HD Patients With Kt/V Data
Data type: float64

Value count: 7555

Number of unique values: 270

Percent unique values: 0.000590

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of adult HD patients included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [80]:
set([(len(str(num))) for num in df['Number Of Adult HD Patients With Kt/V Data']])

{3, 4, 5}

### Number Of Adult HD Patient-Months With Kt/V Data
Data type: float64

Value count: 7175

Number of unique values: 1629

Percent unique values: 0.003560

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the number of adult HD patient-months included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [81]:
set([(len(str(num))) for num in df['Number Of Adult HD Patient-Months With Kt/V Data']])

{3, 4, 5, 6}

### Number Of Adult PD Patients With Kt/V Data
Data type: float64

Value count: 7555

Number of unique values: 136

Percent unique values: 0.000297

Ranges of values: 0 -

Length of values: 1 - 3, 13

Description: Lists the number of adult PD patients included in Kt/V greater than or equal to 1.7 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [82]:
set([(len(str(num))) for num in df['Number Of Adult PD Patients With Kt/V Data']])

{3, 4, 5}

### Number Of Adult PD Patient-Months With Kt/V Data
Data type: float64

Value count: 3358

Number of unique values: 610

Percent unique values: 0.001333

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the number of adult PD patient-months included in Kt/V greater than or equal to 1.7 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered). 

In [83]:
set([(len(str(num))) for num in df['Number Of Adult PD Patient-Months With Kt/V Data']])

{3, 4, 5, 6}

### Number Of Pediatric HD Patients With Kt/V Data
Data type: float64

Value count: 7555

Number of unique values: 19

Percent unique values: 0.000042

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of pediatric HD patients included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [84]:
set([(len(str(num))) for num in df['Number Of Pediatric HD Patients With Kt/V Data']])

{3, 4}

### Number Of Pediatric HD Patient-Months With Kt/V Data
Data type: float64 

Value count: 202

Number of unique values: 67

Percent unique values: 0.000146

Ranges of values: 1 - 

Length of values: 1 - 3

Description: Lists the number of pediatric HD patientmonths included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [85]:
set([(len(str(num))) for num in df['Number Of Pediatric HD Patient-Months With Kt/V Data']])

{3, 4, 5}

### Hypercalcemia Data Availability Code
Data type: float64

Value count: 422

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 201, 258

Length of values: 3

Description: Lists whether the facility had sufficient hypercalcemia data available or the reason for why the data is not available. Hypercalcemia is a condition in which the calcium level in one's blood is above normal. Too much calcium in the blood can weaken bones, create kidney stones, and interfere with how the heart and brain work. Hypercalcemia is usually a result of overactive parathyroid glands.
A normal serum calcium level is 8-10 mg/dL (2-2.5 mmol/L) with some interlaboratory variation in the reference range, and hypercalcemia is defined as a serum calcium level greater than 10.5 mg/dL (>2.5 mmol/L).

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [86]:
df['Hypercalcemia Data Availability Code'].value_counts()

258.0    203
199.0    124
201.0     95
Name: Hypercalcemia Data Availability Code, dtype: int64

### Number Of Patients In Hypercalcemia Summary
Data type: float64

Value count: 7555

Number of unique values: 310

Percent unique values: 0.000678

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility’s hypercalcemia summary. Hypercalcemia is a condition in which the calcium level in one's blood is above normal. Too much calcium in the blood can weaken bones, create kidney stones, and interfere with how the heart and brain work. Hypercalcemia is usually a result of overactive parathyroid glands.
A normal serum calcium level is 8-10 mg/dL (2-2.5 mmol/L) with some interlaboratory variation in the reference range, and hypercalcemia is defined as a serum calcium level greater than 10.5 mg/dL (>2.5 mmol/L).

In [87]:
set([(len(str(num))) for num in df['Number Of Patients In Hypercalcemia Summary']])

{3, 4, 5}

### Number Of Patient-Months In Hypercalcemia Summary
Data type: float64

Value count: 7463

Number of unique values: 1865

Percent unique values: 0.004076

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the number of patient-months included in the facility’s hypercalcemia summary. Hypercalcemia is a condition in which the calcium level in one's blood is above normal. Too much calcium in the blood can weaken bones, create kidney stones, and interfere with how the heart and brain work. Hypercalcemia is usually a result of overactive parathyroid glands.
A normal serum calcium level is 8-10 mg/dL (2-2.5 mmol/L) with some interlaboratory variation in the reference range, and hypercalcemia is defined as a serum calcium level greater than 10.5 mg/dL (>2.5 mmol/L).

In [88]:
set([(len(str(num))) for num in df['Number Of Patient-Months In Hypercalcemia Summary']])

{3, 4, 5, 6}

### Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 mg/dL)
Data type: float64

Value count: 7204

Number of unique values: 48

Percent unique values: 0.000105

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the percentage of Adult patients with Hypercalcemia (serum calcium greater than 10.2 mg/dL). Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders. 

In [89]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 mg/dL)']])

{3, 4}

### Number Of Patients In Serum Phosphorus Summary
Data type: float64

Value count: 7555

Number of unique values: 323

Percent unique values: 0.000706

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility’s serum phosphorus summary.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [90]:
set([(len(str(num))) for num in df['Number Of Patients In Serum Phosphorus Summary']])

{3, 4, 5}

### Number Of Patient-Months In Serum Phosphorus Summary
Data type: float64

Value count: 7463

Number of unique values: 1903

Percent unique values: 0.004159

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the number of patient-months included in the facility’s serum phosphorus summary.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [91]:
set([(len(str(num))) for num in df['Number Of Patient-Months In Serum Phosphorus Summary']])

{3, 4, 5, 6}

### Serum Phosphorus Data Availability Code 
Data type: float64

Value count: 411

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 201, 258

Length of values: 3

Description: Lists whether the facility had sufficient serum phosphorus data available or the reason for why the data is not available.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [92]:
df['Serum Phosphorus Data Availability Code'].value_counts()

258.0    184
199.0    119
201.0    108
Name: Serum Phosphorus Data Availability Code, dtype: int64

### Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 mg/dL
Data type: float64

Value count: 7215

Number of unique values: 35

Percent unique values:  0.000076

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the percentage of Adult patients with serum phosphorus less than 3.5 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [93]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 mg/dL']])

{3, 4}

### Percentage Of Adult Patients With Serum Phosphorus Between 3.5 - 4.5 mg/dL
Data type: float64

Value count: 7215

Number of unique values: 46

Percent unique values: 0.000101

Ranges of values: 10 - 

Length of values: 1 - 3

Description: Lists the percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [94]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Serum Phosphorus Between 3.5 - 4.5 mg/dL']])

{3, 4}

### Percentage Of Adult Patients With Serum Phosphorus Between 4.6 - 5.5 mg/dL
Data type: float64

Value count: 7215

Number of unique values: 44

Percent unique values: 0.000096

Ranges of values: 12 - 

Length of values: 1 - 3

Description: Lists the percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [95]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Serum Phosphorus Between 4.6 - 5.5 mg/dL']])

{3, 4}

### Percentage Of Adult Patients With Serum Phosphorus Between 5.6 - 7.0 mg/dL
Data type: float64

Value count: 7215

Number of unique values: 44

Percent unique values: 0.000096

Ranges of values: 10 - 

Length of values: 1 - 3

Description: Lists the percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [96]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Serum Phosphorus Between 5.6 - 7.0 mg/dL']])

{3, 4}

### Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 mg/dL
Data type: float64

Value count: 7215

Number of unique values: 40

Percent unique values: 0.000087

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [97]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 mg/dL']])

{3, 4}

### SHR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient hospitalization summary.

In [98]:
df['SHR Date'].value_counts()

01/01/2018 - 12/31/2018    7626
Name: SHR Date, dtype: int64

In [99]:
set([(len(str(num))) for num in df['SHR Date']])

{23}

### SRR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient readmission summary.

In [100]:
df['SRR Date'].value_counts()

01/01/2018 - 12/31/2018    7626
Name: SRR Date, dtype: int64

In [101]:
set([(len(str(num))) for num in df['SRR Date']])

{23}

### SMR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2015 - 12/31/2018"

Length of values: 23

Description: Lists the data collection period for patient survival summary.

In [102]:
df['SMR Date'].value_counts()

01/01/2015 - 12/31/2018    7626
Name: SMR Date, dtype: int64

In [103]:
set([(len(str(num))) for num in df['SRR Date']])

{23}

### Patient Hospitalization Category Text
Data type: object

Value count: 6857

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Not Available, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Patient hospitalization category.

In [104]:
df['Patient Hospitalization Category Text'].value_counts()

As Expected             6520
Worse than Expected      257
Better than Expected      80
Name: Patient Hospitalization Category Text, dtype: int64

In [105]:
set([(len(str(num))) for num in df['Patient Hospitalization Category Text']])

{3, 11, 19, 20}

### Patient Hospitalization Data Availability Code
Data type: float64

Value count: 769

Number of unique values: 4

Percent unique values: 0.000009 

Ranges of values: 199, 201, 255, 258

Length of values: 3

Description: Lists whether the facility had sufficient hospitalization data available or the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [106]:
df['Patient Hospitalization Data Availability Code'].value_counts()

258.0    607
199.0    150
201.0      9
255.0      3
Name: Patient Hospitalization Data Availability Code, dtype: int64

### Patient Hospital Readmission Category
Data type: object

Value count: 6806

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Not Available, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Patient readmission category.

Value description:
- As Expected          : Patient hospital readmission categorized as “As Expected”       
- Worse than Expected  :  Patient hospital readmission categorized as “Worse than Expected”
- Better than Expected : Patient hospital readmission categorized as “Better than Expected"

In [107]:
df['Patient Hospital Readmission Category Text'].value_counts()

As Expected             6485
Worse than Expected      209
Better than Expected     112
Name: Patient Hospital Readmission Category Text, dtype: int64

In [108]:
set([(len(str(num))) for num in df['Patient Hospital Readmission Category Text']])

{3, 11, 19, 20}

### Patient Hospital Readmission Data Availability Code
Data type: float64

Value count: 820

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 255, 258

Length of values: 3

Description: Lists whether the facility had sufficient readmission data available or the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [109]:
df['Patient Hospital Readmission Data Availability Code'].value_counts()

258.0    568
199.0    250
255.0      2
Name: Patient Hospital Readmission Data Availability Code, dtype: int64

### Patient Survival Category Text
Data type: object

Value count: 6701

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Patient survival category (Better, Worse or As Expected).

Value description:
- As Expected          : Patient transfusions categorized as “As Expected”       
- Worse than Expected  :  Patient transfusions categorized as “Worse than Expected”
- Better than Expected : Patient transfusions categorized as “Better than Expected"

In [110]:
df['Patient Survival Category Text'].value_counts()

As Expected             5673
Better than Expected     525
Worse than Expected      503
Name: Patient Survival Category Text, dtype: int64

In [111]:
set([(len(str(num))) for num in df['Patient Hospital Readmission Category Text']])

{3, 11, 19, 20}

### Patient Survival Data Availability Code
Data type: float64

Value count: 925

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 255, 258

Length of values: 3 

Description: Lists whether the facility had sufficient patient survival data available or the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [112]:
df['Patient Survival Data Availability Code'].value_counts()

258.0    672
199.0    244
201.0      6
255.0      3
Name: Patient Survival Data Availability Code, dtype: int64

### Number Of Patients Included In Hospitalization Summary
Data type: float64

Value count: 7552

Number of unique values: 249

Percent unique values: 0.000553

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility’s hospitalization summary.

In [113]:
set([(len(str(num))) for num in df['Number Of Patients Included In Hospitalization Summary']])

{3, 4, 5}

### Number Of Hospitalizations Included In Hospital Readmission Summary
Data type: float64

Value count: 7553

Number of unique values: 295

Percent unique values: 0.000656

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of index discharges included in the facility’s readmission summary.

In [114]:
set([(len(str(num))) for num in df['Number Of Hospitalizations Included In Hospital Readmission Summary']])

{3, 4, 5}

### Number Of Patients Included In Survival Summary
Data type: float64

Value count: 7552

Number of unique values: 807

Percent unique values: 0.001794

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the number of patients included in the facility’s survival summary.

In [115]:
set([(len(str(num))) for num in df['Number Of Patients Included In Survival Summary']])

{3, 4, 5, 6}

### Mortality Rate (Facility)
Data type: float64

Value count: 6701

Number of unique values: 390

Percent unique values: 0.000867

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the facility’s mortality rate per 100 patient-years.

In [116]:
set([(len(str(num))) for num in df['Mortality Rate (Facility)']])

{3, 4}

### Mortality Rate: Upper Confidence Limit (97.5%)
Data type: float64

Value count: 6701

Number of unique values: 545

Percent unique values: 0.001211

Ranges of values: 10.3 -

Length of values: 2 - 5

Description: Lists the upper confident limit (97.5%) for mortality rate per 100 patient-years.

In [117]:
set([(len(str(num))) for num in df['Mortality Rate: Upper Confidence Limit (97.5%)']])

{3, 4, 5}

### Mortality Rate: Lower Confidence Limit (2.5%)
Data type: float64

Value count: 6701

Number of unique values: 323

Percent unique values: 0.000718

Ranges of values: 0.1 - 

Length of values: 1 - 4

Description: Lists the lower confident limit (2.5%) for mortality rate per 100 patient-years.

In [118]:
set([(len(str(num))) for num in df['Mortality Rate: Lower Confidence Limit (2.5%)']])

{3, 4}

### Readmission Rate (Facility)
Data type: float64

Value count: 6806

Number of unique values: 463

Percent unique values: 0.001029

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the facility’s readmission rate as a percentage of hospital discharges.

In [119]:
set([(len(str(num))) for num in df['Readmission Rate (Facility)']])

{3, 4}

### Readmission Rate: Upper Confidence Limit (97.5%)
Data type: float64

Value count: 6806

Number of unique values: 543

Percent unique values: 0.001207

Ranges of values: 104.2

Length of values: 2 - 5

Description: Lists the upper confident limit (97.5%) for readmission rate as a percentage of hospital discharges.

In [120]:
set([(len(str(num))) for num in df['Readmission Rate: Upper Confidence Limit (97.5%)']])

{3, 4, 5}

### Readmission Rate: Lower Confidence Limit (2.5%)
Data type: float64

Value count: 6806

Number of unique values: 338

Percent unique values: 0.000751

Ranges of values: 0.1 - 

Length of values: 1 - 4

Description: Lists the lower confident limit (2.5%) for readmission rate as a percentage of hospital discharges.

In [121]:
set([(len(str(num))) for num in df['Readmission Rate: Lower Confidence Limit (2.5%)']])

{3, 4}

### Hospitalization Rate (Facility)
Data type: float64

Value count: 6857

Number of unique values: 2004

Percent unique values: 0.004454

Ranges of values: 0 - 

Length of values: 1 - 5

Description: Lists the facility’s hospitalization rate per 100 patient-years.

In [122]:
set([(len(str(num))) for num in df['Hospitalization Rate (Facility)']])

{3, 4, 5}

### Hospitalization Rate: Upper Confidence Limit (97.5%)
Data type: float64

Value count: 6856

Number of unique values: 2583

Percent unique values: 0.005741

Ranges of values: 1079.9 - 

Length of values: 3, 5, 6

Description: Lists the upper confident limit (97.5%) for hospitalization rate per 100 patient-years.

In [123]:
set([(len(str(num))) for num in df['Hospitalization Rate: Upper Confidence Limit (97.5%)']])

{3, 5, 6}

### Hospitalization Rate: Lower Confidence Limit (2.5%)
Data type: float64

Value count: 6857

Number of unique values: 1710

Percent unique values: 0.003801

Ranges of values: 0 -

Length of values: 1 - 5

Description: Lists the lower confident limit (2.5%) for hospitalization rate per 100 patient-years.

In [124]:
set([(len(str(num))) for num in df['Hospitalization Rate: Lower Confidence Limit (2.5%)']])

{3, 4, 5}

### Number Of Pediatric PD Patients With Kt/V Data
Data type: float64

Value count: 7555

Number of unique values: 25

Percent unique values: 0.000056

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of pediatric PD patients included in Kt/V greater than or equal to 1.8 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [125]:
set([(len(str(num))) for num in df['Number Of Pediatric PD Patients With Kt/V Data']])

{3, 4}

### Pediatric PD Kt/V Data Availability Code
Data type: float64 

Value count: 7597

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 258, 259

Length of values: 3

Description: Lists whether the facility had sufficient pediatric PD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 259 : The dialysis center does not provide hemodialysis and/or peritoneal dialysis to pediatric patients during the reporting period.

In [126]:
df['Pediatric PD Kt/V Data Availability Code'].value_counts()

259.0    7244
201.0     161
199.0     120
258.0      72
Name: Pediatric PD Kt/V Data Availability Code, dtype: int64

### Number Of Pediatric PD Patient-Months With Kt/V Data
Data type: float64

Value count: 150

Number of unique values: 81

Percent unique values: 0.000180

Ranges of values: 1 - 

Length of values: 1 - 3

Description: Lists the number of pediatric PD patient months included in Kt/V greater than or equal to 1.8 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [127]:
set([(len(str(num))) for num in df['Number Of Pediatric PD Patient-Months With Kt/V Data']])

{3, 4, 5}

### Percentage Of Pediatric PD Patients With Kt/V >= 1.8
Data type: float64

Value count: 29

Number of unique values: 21

Percent unique values: 0.000047

Ranges of values: 23 - 

Length of values: 2, 3

Description: Lists the percentage of pediatric PD patients with Kt/V greater than or equal to 1.8. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [128]:
set([(len(str(num))) for num in df['Percentage Of Pediatric PD Patients With Kt/V >= 1.8']])

{3, 4}

### SIR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient infection summary (SIR).

In [129]:
df['SIR Date'].value_counts()

01/01/2018 - 12/31/2018    7626
Name: SIR Date, dtype: int64

In [130]:
set([(len(str(num))) for num in df['SIR Date']])

{23}

### Patient Infection Data Availability Code
Data type: float64

Value count: 1375

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 201, 258

Length of values: 3

Description: Lists whether the facility had sufficient infection data available or the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [131]:
df['Patient Infection Data Availability Code'].value_counts()

258.0    657
201.0    609
199.0    109
Name: Patient Infection Data Availability Code, dtype: int64

### Patient Infection Category Text
Data type: object

Value count: 6251

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Patient infection category.

Value description:
- As Expected          : Patient infection categorized as “As Expected”       
- Worse than Expected  :  Patient infection categorized as “Worse than Expected”
- Better than Expected : Patient infection categorized as “Better than Expected"

In [132]:
df['Patient Infection Category Text'].value_counts()

As Expected             5055
Better than Expected     988
Worse than Expected      208
Name: Patient Infection Category Text, dtype: int64

In [133]:
set([(len(str(num))) for num in df['Patient Infection Category Text']])

{3, 11, 19, 20}

### Standard Infection Ratio
Data type: float64

Value count: 6251

Number of unique values: 336

Percent unique values: 0.000747

Ranges of values: 0.5 - 

Length of values: 1 - 5

Description: Lists the facility’s Standardized Infection Ratio.

In [134]:
set([(len(str(num))) for num in df['Standard Infection Ratio']])

{3, 4, 5}

### SIR: Upper Confidence Limit (97.5%)
Data type: float64

Value count: 6251

Number of unique values: 602

Percent unique values: 0.001338

Ranges of values: 0.16 - 

Length of values: 1 - 5

Description: Lists the upper confident limit (97.5%) for Standardized Infection Ratio (SIR).

In [135]:
set([(len(str(num))) for num in df['SIR: Upper Confidence Limit (97.5%)']])

{3, 4, 5}

### SIR: Lower Confidence Limit (2.5%)
Data type: float64

Value count: 5368

Number of unique values: 190

Percent unique values: 0.000422

Ranges of values: 0.01 - 

Length of values: 1 - 4

Description: Lists the lower confident limit (2.5%) for Standardized Infection Ratio (SIR).

In [136]:
set([(len(str(num))) for num in df['SIR: Lower Confidence Limit (2.5%)']])

{3, 4}

### Transfusion Rate (Facility)
Data type: float64

Value count: 6285

Number of unique values: 717

Percent unique values: 0.001594

Ranges of values: 0 - 

Length of values: 1 - 5

Description: Lists the facility’s transfusion rate per 100 patient-years.

In [137]:
set([(len(str(num))) for num in df['Transfusion Rate (Facility)']])

{3, 4, 5}

### Transfusion Rate: Upper Confidence Limit (97.5%)
Data type: float64

Value count: 6285

Number of unique values: 1604

Percent unique values: 0.003565 

Ranges of values: 100 - 

Length of values: 2 - 5

Description: Lists the upper confident limit (97.5%) for transfusion rate per 100 patient-years.

In [138]:
set([(len(str(num))) for num in df['Transfusion Rate: Upper Confidence Limit (97.5%)']])

{3, 4, 5}

### Transfusion Rate: Lower Confidence Limit (2.5%)
Data type: float64

Value count: 6285

Number of unique values: 413

Percent unique values: 0.000918

Ranges of values: 0.1 - 

Length of values: 1 - 5

Description: Lists the lower confident limit (2.5%) for transfusion rate per 100 patient-years.

In [139]:
set([(len(str(num))) for num in df['Transfusion Rate: Lower Confidence Limit (2.5%)']])

{3, 4, 5}

### Fistula Data Availability Code
Data type: float64

Value count: 711

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 256, 258

Length of values: 3

Description: Lists whether the facility had sufficient fistula data available or the reason for why the data is not available. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

Value description: 
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 256 : The dialysis center does not provide hemodialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [140]:
df['Fistula Data Availability Code'].value_counts()

258.0    284
199.0    262
256.0    110
201.0     55
Name: Fistula Data Availability Code, dtype: int64

### Fistula Category Text
Data type: object

Value count: 6915

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Fistula category. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

Value description:
- As Expected          : Patient fistulas categorized as “As Expected”       
- Worse than Expected  :  Patient fistulas categorized as “Worse than Expected”
- Better than Expected : Patient fistulas categorized as “Better than Expected"

In [141]:
df['Fistula Category Text'].value_counts()

As Expected             6423
Better than Expected     278
Worse than Expected      214
Name: Fistula Category Text, dtype: int64

In [142]:
set([(len(str(num))) for num in df['Fistula Category Text']])

{3, 11, 19, 20}

### Number Of Patients Included In Fistula Summary
Data type: float64

Value count: 7555

Number of unique values: 292

Percent unique values: 0.000649

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility’s fistula summary. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [143]:
set([(len(str(num))) for num in df['Number Of Patients Included In Fistula Summary']])

{3, 4, 5}

### Fistula Rate (Facility)
Data type: float64

Value count: 6915

Number of unique values: 587

Percent unique values: 0.001305

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the facility’s fistula rate per 100 patient-years. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [144]:
set([(len(str(num))) for num in df['Fistula Rate (Facility)']])

{3, 4}

### Fistula Rate: Upper Confidence Limit (97.5%)
Data type: float64

Value count: 6915

Number of unique values: 487

Percent unique values: 0.001082

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the upper confident limit (97.5%) for fistula rate per 100 patient-years. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [145]:
set([(len(str(num))) for num in df['Fistula Rate: Upper Confidence Limit (97.5%)']])

{3, 4, 5}

### Fistula Rate: Lower Confidence Limit (2.5%)
Data type: float64

Value count: 6915

Number of unique values: 727

Percent unique values: 0.001616

Ranges of values: 0.1 - 

Length of values: 1 - 4

Description: Lists the lower confident limit (2.5%) for fistula rate per 100 patient-years. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [146]:
set([(len(str(num))) for num in df['Fistula Rate: Lower Confidence Limit (2.5%)']])

{3, 4}

### Number Of Patients In Long Term Catheter Summary
Data type: float64

Value count: 7555

Number of unique values: 292

Percent unique values: 0.000649

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility’s long term catheter summary. 

In [147]:
set([(len(str(num))) for num in df['Number Of Patients In Long Term Catheter Summary']])

{3, 4, 5}

### Number Of Patient-Months In Long Term Catheter Summary
Data type: float64

Value count: 7298

Number of unique values: 1780

Percent unique values: 0.003956

Ranges of values: 1 - 

Length of values: 1 - 4

Description: Lists the number of patient-months included in the facility’s long term catheter summary. 

In [148]:
set([(len(str(num))) for num in df['Number Of Patient-Months In Long Term Catheter Summary']])

{3, 4, 5, 6}

### Long Term Catheter Data Availability Code
Data type: float64

Value count: 711

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 256, 258

Length of values: 3

Description: Lists whether the facility had sufficient long term catheter data available or the reason for why the data is not available.

Value description: 
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 256 : The dialysis center does not provide hemodialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [149]:
df['Long Term Catheter Data Availability Code'].value_counts()

199.0    262
258.0    192
201.0    129
256.0    128
Name: Long Term Catheter Data Availability Code, dtype: int64

### Percentage Of Adult Patients With Long Term Catheter In Use
Data type: float64

Value count: 6915

Number of unique values: 67

Percent unique values: 0.000149

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the percentage of adult patients with long term catheter in use.

In [150]:
set([(len(str(num))) for num in df['Percentage Of Adult Patients With Long Term Catheter In Use']])

{3, 4}

### Number Of Patients In nPCR Summary
Data type: float64

Value count: 7555 

Number of unique values: 25

Percent unique values: 0.000056

Ranges of values: 0 - 

Length of values: 1 - 3

Description: 
Lists the number of patients included in the facility's normalized protein catabolic rate (nPCR) summary. The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [151]:
set([(len(str(num))) for num in df['Number Of Patients In nPCR Summary']])

{3, 4}

### Number Of Patient-Months In nPCR Summary
Data type: float64

Value count: 224

Number of unique values: 73

Percent unique values: 0.000162

Ranges of values: 1 - 

Length of values: 1 - 3

Description: Lists the number of patient-months included in the facility's patients included in the facility's normalized protein catabolic rate (nPCR) summary. The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [152]:
set([(len(str(num))) for num in df['Number Of Patient-Months In nPCR Summary']])

{3, 4, 5}

### nPCR Data Availability Code
Data type: float64

Value count: 7600

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 258, 259

Length of values: 3

Description: Lists whether the facility had sufficient normalized protein catabolic rate (nPCR) data available or the reason for why the data is not available. The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 259 : The dialysis center does not provide hemodialysis and/or peritoneal dialysis to pediatric patients during the reporting period.

In [153]:
df['nPCR Data Availability Code'].value_counts()

259.0    7082
201.0     249
199.0     196
258.0      73
Name: nPCR Data Availability Code, dtype: int64

### Percentage Of Pediatric HD Patients With nPCR
Data type: float64

Value count: 26

Number of unique values: 14

Percent unique values: 0.000031

Ranges of values: 100 -

Length of values: 2 - 3

Description: Lists the percentage of pediatric HD patients with normalized protein catabolic rate (nPCR). The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [154]:
set([(len(str(num))) for num in df['Percentage Of Pediatric HD Patients With nPCR']])

{3, 4, 5}

### SWR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0.000002

Ranges of values: "01/01/2015 - 12/31/2017"

Length of values: 23

Description: Years Patient Transplant Waitlist Is Based Upon. Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected.

In [155]:
df['SWR Date'].value_counts()

01/01/2015 - 12/31/2017    7626
Name: SWR Date, dtype: int64

In [156]:
set([(len(str(num))) for num in df['SWR Date']])

{23}

### SWR Category Text
Data type: object

Value count: 4111

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Standardized waitlist ratio (SWR) category text. The standardized waitlist ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

Value description:
- As Expected          : Patient standardized waitlist ratio (SWR) categorized as “As Expected”       
- Worse than Expected  :  Patient standardized waitlist ratio (SWR) categorized as “Worse than Expected”
- Better than Expected : Patient standardized waitlist ratio (SWR) categorized as “Better than Expected"

In [157]:
df['SWR Category Text'].value_counts()

As Expected             3593
Better than Expected     316
Worse than Expected      202
Name: SWR Category Text, dtype: int64

In [158]:
set([(len(str(num))) for num in df['SWR Category Text']])

{3, 11, 19, 20}

### Patient Transplant Waitlist Data Availability Code
Data type: float64

Value count: 3515

Number of unique values: 4

Percent unique values: 0.000009

Ranges of values: 199, 201, 255, 258

Length of values: 3

Description: Lists whether the facility had sufficient patient transplant waitlist data available or
the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data.

In [159]:
df['Patient Transplant Waitlist Data Availability Code'].value_counts()

199.0    2705
258.0     697
201.0     112
255.0       1
Name: Patient Transplant Waitlist Data Availability Code, dtype: int64

### 95% C.I. (Upper Limit) For SWR
Data type: float64

Value count: 4111

Number of unique values: 593

Percent unique values: 0.001318

Ranges of values: 0.23 - 

Length of values: 1 - 5

Description: Lists the upper confident interval limit (95%) for Standardized Waitlist Ratio (SWR) rate per 100 patient-years. The Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

In [160]:
set([(len(str(num))) for num in df['95% C.I. (Upper Limit) For SWR']])

{3, 4, 5}

### 95% C.I. (Lower Limit) For SWR
Data type: float64

Value count: 4111

Number of unique values: 214

Percent unique values: 0.511%

Ranges of values: 0.000476 

Length of values: 1 - 4

Description: Lists the lower confident interval limit (95%) for Standardized Waitlist Ratio (SWR) rate per 100 patient-years. The Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

In [161]:
set([(len(str(num))) for num in df['95% C.I. (Lower Limit) For SWR']])

{3, 4}

### Number Of Patients In This Facility For SWR
Data type: float64

Value count: 7554

Number of unique values: 145

Percent unique values: 0.000322

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients in this facility for Standardized Waitlist Ratio (SWR). The Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

In [162]:
set([(len(str(num))) for num in df['Number Of Patients In This Facility For SWR']])

{3, 4, 5}

### Standardized First Kidney Transplant Waitlist Ratio
Data type: float64

Value count: 4111

Number of unique values: 360

Percent unique values: 0.000800

Ranges of values: 0.08 - 

Length of values: 1 - 4

Description: Lists the ratio of standardized waitlist ratio (SWR) in this facility. This measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received living donor transplants within the first year of initiating dialysis. 

In [163]:
set([(len(str(num))) for num in df['Standardized First Kidney Transplant Waitlist Ratio']])

{3, 4}

### PPPW Category Text
Data type: object

Value count: 7178

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: As Expected, Worse than Expected, Better than Expected

Length of values: 3, 11, 19, 20

Description: Percentage of prevalent patients waitlisted (PPPW) category text. The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

Value description:
- As Expected          : Patient percentage of prevalent patients waitlisted (PPPW) categorized as “As Expected”       
- Worse than Expected  :  Patient percentage of prevalent patients waitlisted (PPPW) categorized as “Worse than Expected”
- Better than Expected : Patient percentage of prevalent patients waitlisted (PPPW) categorized as “Better than Expected"

In [164]:
df['PPPW Category Text'].value_counts()

As Expected             6539
Better than Expected     462
Worse than Expected      177
Name: PPPW Category Text, dtype: int64

In [165]:
set([(len(str(num))) for num in df['PPPW Category Text']])

{3, 11, 19, 20}

### Patient Prevalent Transplant Waitlist Data Availability Code
Data type: float64

Value count: 448

Number of unique values: 3

Percent unique values: 0.000007

Ranges of values: 199, 201, 258

Length of values: 3

Description: Lists whether the facility had sufficient patient prevalent transplant waitlist data available or the reason for why the data is not available.

Value description:
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data.

In [166]:
df['Patient Prevalent Transplant Waitlist Data Availability Code'].value_counts()

258.0    279
199.0    155
201.0     14
Name: Patient Prevalent Transplant Waitlist Data Availability Code, dtype: int64

### 95% C.I. (Upper Limit) For PPPW
Data type: float64

Value count: 7178

Number of unique values: 766

Percent unique values: 0.001702

Ranges of values: 0 - 

Length of values: 1 - 4

Description: Lists the upper confident interval limit (95%) for Percentage of Prevalent Patients Waitlisted (PPPW) rate per 100 patient-years. The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

In [167]:
set([(len(str(num))) for num in df['95% C.I. (Upper Limit) For PPPW']])

{3, 4}

### 95% C.I. (Lower Limit) For PPPW
Data type: float64

Value count: 7178

Number of unique values: 355

Percent unique values: 0.000789

Ranges of values: 0.1 - 

Length of values: 1 - 4

Description: Lists the lower confident interval limit (95%) for Percentage of Prevalent Patients Waitlisted (PPPW) rate per 100 patient-years. The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

In [168]:
set([(len(str(num))) for num in df['95% C.I. (Lower Limit) For PPPW']])

{3, 4}

### Number Of Patients For PPPW
Data type: float64

Value count: 7555

Number of unique values: 269

Percent unique values: 0.000598

Ranges of values: 0 - 

Length of values: 1 - 3

Description: Lists the number of patients included in the facility's percentage of prevalent patients waitlisted (PPPW). The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

In [169]:
set([(len(str(num))) for num in df['Number Of Patients For PPPW']])

{3, 4, 5}

### Percentage Of Prevalent Patients Waitlisted
Data type: float64

Value count: 7178

Number of unique values: 571

Percent unique values: 0.001269

Ranges of values: 0.1 - 

Length of values: 1 - 4

Description: Lists the percentage of prevalent patients waitlisted.

In [170]:
set([(len(str(num))) for num in df['Percentage Of Prevalent Patients Waitlisted']])

{3, 4}

### Location
Data type: object 

Value count: 7162

Number of unique values: 7043 

Percent unique values: 0.015653

Ranges of values: 

Length of values: 3, 23, 25 - 29

Description: Lists the coorinate location that corresponds to the facility listed.

In [171]:
set([(len(str(num))) for num in df['Location']])

{3, 23, 25, 26, 27, 28, 29}

# Data Cleaning

## Fill NA

In [172]:
df['Chain Organization'] = df['Chain Organization'].fillna('INDEPENDENT')

In [173]:
fillwithmean = ['Five Star', 
                'Percentage Of Medicare Patients With Hgb < 10 g/dL', 
                'Percentage Of Medicare Patients With Hgb > 12 g/dL', 
                'Number Of Dialysis Patients With Hgb Data', 
                'Number Of Patients Included In The Transfusion Summary', 
                'Percent Of Adult HD Patients With Kt/V >= 1.2', 
                'Number Of Adult HD Patients With Kt/V Data', 
                'Number Of Adult HD Patient-Months With Kt/V Data', 
                'Number Of Adult PD Patients With Kt/V Data',  
                'Number Of Pediatric HD Patients With Kt/V Data', 
                'Number Of Patients In Hypercalcemia Summary', 
                'Number Of Patient-Months In Hypercalcemia Summary', 
                'Percentage Of Adult Patients With Hypercalcemia (Serum Calcium Greater Than 10.2 mg/dL)', 
                'Number Of Patients In Serum Phosphorus Summary', 
                'Number Of Patient-Months In Serum Phosphorus Summary', 
                'Percentage Of Adult Patients With Serum Phosphorus Less Than 3.5 mg/dL', 
                'Percentage Of Adult Patients With Serum Phosphorus Between 3.5 - 4.5 mg/dL', 
                'Percentage Of Adult Patients With Serum Phosphorus Between 4.6 - 5.5 mg/dL', 
                'Percentage Of Adult Patients With Serum Phosphorus Between 5.6 - 7.0 mg/dL', 
                'Percentage Of Adult Patients With Serum Phosphorus Greater Than 7.0 mg/dL', 
                'Number Of Patients Included In Hospitalization Summary', 
                'Number Of Hospitalizations Included In Hospital Readmission Summary', 
                'Number Of Patients Included In Survival Summary', 
                'Mortality Rate (Facility)', 
                'Mortality Rate: Upper Confidence Limit (97.5%)', 
                'Mortality Rate: Lower Confidence Limit (2.5%)', 
                'Readmission Rate (Facility)', 
                'Readmission Rate: Upper Confidence Limit (97.5%)', 
                'Readmission Rate: Lower Confidence Limit (2.5%)', 
                'Hospitalization Rate (Facility)', 
                'Hospitalization Rate: Upper Confidence Limit (97.5%)', 
                'Hospitalization Rate: Lower Confidence Limit (2.5%)', 
                'Number Of Pediatric PD Patients With Kt/V Data', 
                'Standard Infection Ratio', 
                'SIR: Upper Confidence Limit (97.5%)', 
                'SIR: Lower Confidence Limit (2.5%)', 
                'Transfusion Rate (Facility)', 
                'Transfusion Rate: Upper Confidence Limit (97.5%)', 
                'Transfusion Rate: Lower Confidence Limit (2.5%)', 
                'Number Of Patients Included In Fistula Summary', 
                'Fistula Rate (Facility)', 
                'Fistula Rate: Upper Confidence Limit (97.5%)', 
                'Fistula Rate: Lower Confidence Limit (2.5%)', 
                'Number Of Patients In Long Term Catheter Summary', 
                'Number Of Patient-Months In Long Term Catheter Summary', 
                'Percentage Of Adult Patients With Long Term Catheter In Use', 
                'Number Of Patients In nPCR Summary', 
                '95% C.I. (Upper Limit) For SWR', 
                '95% C.I. (Lower Limit) For SWR', 
                'Number Of Patients In This Facility For SWR', 
                'Standardized First Kidney Transplant Waitlist Ratio', 
                '95% C.I. (Upper Limit) For PPPW', 
                '95% C.I. (Lower Limit) For PPPW', 
                'Number Of Patients For PPPW', 
                'Percentage Of Prevalent Patients Waitlisted']

In [174]:
df[fillwithmean] = df[fillwithmean].fillna(df[fillwithmean].mean())

In [175]:
dropcolumns = ['Five Star Data Availability Code', 
               'Address Line 2', 
               'Hgb < 10 Data Availability Code', 
               'Hgb > 12 Data Availability Code', 
               'Patient Transfusion Data Availability Code', 
               'Adult HD Kt/V Data Availability Code',
               'Percentage Of Adult PD PTS With Kt/V >= 1.7', 
               'Percentage Of Pediatric HD Patients With Kt/V >= 1.2', 
               'Number Of Adult PD Patient-Months With Kt/V Data', 
               'Number Of Pediatric HD Patient-Months With Kt/V Data', 
               'Hypercalcemia Data Availability Code', 
               'Serum Phosphorus Data Availability Code', 
               'Patient Hospitalization Data Availability Code', 
               'Patient Hospital Readmission Data Availability Code', 
               'Patient Survival Data Availability Code', 
               'Number Of Pediatric PD Patient-Months With Kt/V Data', 
               'Percentage Of Pediatric PD Patients With Kt/V >= 1.8', 
               'Patient Infection Data Availability Code', 
               'Fistula Data Availability Code', 
               'Long Term Catheter Data Availability Code', 
               'Number Of Patient-Months In nPCR Summary', 
               'Percentage Of Pediatric HD Patients With nPCR', 
               'Patient Transplant Waitlist Data Availability Code', 
               'Patient Prevalent Transplant Waitlist Data Availability Code', 
               'Location']
               

In [176]:
df.drop(df[dropcolumns], axis=1, inplace=True)

In [177]:
backfill = ['Patient Transfusion Category Text', 
                'Adult PD Kt/V Data Availability Code', 
                'Pediatric HD Kt/V Data Availability Code', 
                'Patient Hospitalization Category Text', 
                'Patient Hospital Readmission Category Text', 
                'Patient Survival Category Text', 
                'Pediatric PD Kt/V Data Availability Code', 
                'Patient Infection Category Text', 
                'Fistula Category Text', 
                'nPCR Data Availability Code', 
                'SWR Category Text', 
                'PPPW Category Text']


In [178]:
df[backfill] = df[backfill].fillna(method='bfill')

In [179]:
df.isnull().values.any()

False

## Change dtypes

In [180]:
df['Five Star Date'].head()

0    01/01/2015 - 12/31/2018
1    01/01/2015 - 12/31/2018
2    01/01/2015 - 12/31/2018
3    01/01/2015 - 12/31/2018
4    01/01/2015 - 12/31/2018
Name: Five Star Date, dtype: object

In [181]:
isdateframe = ['Five Star Date', 'Claims Date', 'CROWNWeb Date', 'STrR Date', 'SHR Date', 'SRR Date', 'SMR Date', 'SIR Date', 'SWR Date']

In [182]:
isdate = ['Certification Or Recertification Date']

In [183]:
df[isdate] = df[isdate].astype('datetime64[ns]')

In [184]:
df.iloc[:, :60].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7626 entries, 0 to 7625
Data columns (total 60 columns):
 #   Column                                                                                   Non-Null Count  Dtype         
---  ------                                                                                   --------------  -----         
 0   Provider Number                                                                          7626 non-null   int64         
 1   Network                                                                                  7626 non-null   int64         
 2   Facility Name                                                                            7626 non-null   object        
 3   Five Star Date                                                                           7626 non-null   object        
 4   Five Star                                                                                7626 non-null   float64       
 5   Address Line 

In [185]:
df.iloc[:, 60:].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7626 entries, 0 to 7625
Data columns (total 34 columns):
 #   Column                                                       Non-Null Count  Dtype  
---  ------                                                       --------------  -----  
 0   Hospitalization Rate (Facility)                              7626 non-null   float64
 1   Hospitalization Rate: Upper Confidence Limit (97.5%)         7626 non-null   float64
 2   Hospitalization Rate: Lower Confidence Limit (2.5%)          7626 non-null   float64
 3   Number Of Pediatric PD Patients With Kt/V Data               7626 non-null   float64
 4   Pediatric PD Kt/V Data Availability Code                     7626 non-null   float64
 5   SIR Date                                                     7626 non-null   object 
 6   Patient Infection Category Text                              7626 non-null   object 
 7   Standard Infection Ratio                                     7626 non-null   f

## Fix value format

In [186]:
df['Zip'] = df['Zip'].astype(str).str.zfill(5)

In [187]:
set([(len(str(num))) for num in df['Zip']])

{5}

In [188]:
df['Five Star'] = df['Five Star'].astype('int64')

In [197]:
df['Chain Owned'].unique()

array(['Yes', 'No'], dtype=object)

In [200]:
df['Chain Owned'] = df['Chain Owned'].replace(to_replace=['No', 'Yes'], value=[0, 1])

In [206]:
df['Profit Or Non-Profit'] = df['Profit Or Non-Profit'].replace(to_replace=['Non-Profit', 'Profit'], value=[0, 1])

In [208]:
df.iloc[:5, :60].head().T

Unnamed: 0,0,1,2,3,4
Provider Number,42592,52761,72549,82524,82527
Network,13,18,1,4,4
Facility Name,DCI - LITTLE ROCK RENAL SERVICES LLC,DAVITA-PREMIER DIALYSIS CENTER,WALLINGFORD DIALYSIS CARE LLC,"DSI LAUREL DIALYSIS, LLC","FRESENIUS MEDICAL CARE SOUTHERN DELAWARE, LLC"
Five Star Date,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018,01/01/2015 - 12/31/2018
Five Star,2,3,5,4,5
Address Line 1,1910 JOHN BARROW RD,7612 ATLANTIC AVENUE,720 N MAIN STREET EXT,LAUREL SQUARE SHOPPING CENTER,9115 ANTIQUE ALLEY
City,LITTLE ROCK,CUDAHY,WALLINGFORD,LAUREL,BRIDGEVILLE
State,AR,CA,CT,DE,DE
Zip,72204,90201,06492,19956,19933
County,PULASKI,LOS ANGELES,NEW HAVEN,SUSSEX,SUSSEX


In [209]:
df.iloc[:5, 60:].head().T

Unnamed: 0,0,1,2,3,4
Hospitalization Rate (Facility),228.3,222.9,158.3,186.1,150.8
Hospitalization Rate: Upper Confidence Limit (97.5%),346.5,323,280.6,321.1,278
Hospitalization Rate: Lower Confidence Limit (2.5%),153.6,161.7,93.7,112.8,86
Number Of Pediatric PD Patients With Kt/V Data,0,0,0,0,0
Pediatric PD Kt/V Data Availability Code,259,259,259,259,259
SIR Date,01/01/2018 - 12/31/2018,01/01/2018 - 12/31/2018,01/01/2018 - 12/31/2018,01/01/2018 - 12/31/2018,01/01/2018 - 12/31/2018
Patient Infection Category Text,As Expected,As Expected,As Expected,As Expected,As Expected
Standard Infection Ratio,1.61,0.75,1.54,1.45,0
SIR: Upper Confidence Limit (97.5%),2.8,1.28,4.2,3.94,1.2
SIR: Lower Confidence Limit (2.5%),0.85,0.41,0.39,0.37,0.30052


# Export to new csv

In [210]:
import os

In [211]:
os.getcwd()

'/Users/nicholehartz/Documents/DataScience/Projects/Capstone2'

In [212]:
df.to_csv('capstone2_dialysis_cleaned_data.csv')