In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
df = pd.read_csv('Dialysis_Facility_Compare_-_Listing_by_Facility.csv')

# Replace Missing Values

In [3]:
df.replace(to_replace=['Not Available'], value=np.nan, inplace=True)

In [4]:
df.drop(['Location', 'Percentage of Pediatric HD patients with Kt/V >= 1.2', 'Number of Pediatric HD patient-months with Kt/V data', 'Number of pediatric PD patient-months with KT/V data', 'Percentage of pediatric PD patients with Kt/V>=1.8', 'Number of patient-months in nPCR summary', 'Percentage of pediatric HD patients with nPCR'], axis=1, inplace=True)

In [5]:
df = df.rename(columns={'Number of patient-months in Serum phosphorus summary ' : 'Number of patient-months in Serum phosphorus summary', 'Serum phosphorus Data Availability Code ' : 'Serum phosphorus Data Availability Code', 'Number of patient months in long term catheter summary' : 'Number of patient-months in long term catheter summary'})

In [6]:
fillwithmean= ['Five Star', 'Percentage of Medicare patients with Hgb<10 g/dL', 'Percentage of Medicare patients with Hgb>12 g/dL', 'Number of Dialysis Patients with Hgb data', 'Number of patients included in the transfusion summary', 'Percent of Adult HD patients with Kt/V >= 1.2', 'Percentage of Adult PD PTS with Kt/V >= 1.7', 'Number of Adult HD patients with Kt/V data', 'Number of Adult HD patient-months with Kt/V data', 'Number of Adult PD patients with Kt/V data', 'Number of Adult PD patient-months with Kt/V data', 'Number of Pediatric HD patients with Kt/V data', 'Number of patients in hypercalcemia summary', 'Number of patient-months in hypercalcemia summary', 'Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)', 'Number of patients in Serum phosphorus summary', 'Number of patient-months in Serum phosphorus summary', 'Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL', 'Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL', 'Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL', 'Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL', 'Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL', 'Number of patients included in hospitalization summary', 'Number of hospitalizations included in hospital readmission summary', 'Number of patients included in survival summary', 'Mortality Rate (Facility)', 'Mortality Rate: Upper Confidence Limit (97.5%)', 'Mortality Rate: Lower Confidence Limit (2.5%)', 'Readmission Rate (Facility)', 'Readmission Rate: Upper Confidence Limit (97.5%)', 'Readmission Rate: Lower Confidence Limit (2.5%)', 'Hospitalization Rate (Facility)', 'Hospitalization Rate: Upper Confidence Limit (97.5%)', 'Hospitalization Rate: Lower Confidence Limit (2.5%)', 'Number of pediatric PD patients with Kt/V data', 'Standard Infection Ratio', 'SIR: Upper Confidence Limit (97.5%)', 'SIR: Lower Confidence Limit (2.5%)', 'Transfusion Rate (Facility)', 'Transfusion Rate: Upper Confidence Limit (97.5%)', 'Transfusion Rate: Lower Confidence Limit (2.5%)', 'Number of Patients included in fistula summary', 'Fistula Rate (Facility)', 'Fistula Rate: Upper Confidence Limit (97.5%)', 'Fistula Rate: Lower Confidence Limit (2.5%)', 'Number of patients in long term catheter summary', 'Number of patient-months in long term catheter summary', 'Percentage of Adult patients with long term catheter in use', 'Number of patients in nPCR summary', 'Number of patients in this facility for SWR', 'Standardized First Kidney Transplant Waitlist Ratio', '95% C.I. (upper limit) for PPPW', '95% C.I. (lower limit) for PPPW', 'Number of patients for PPPW', 'Percentage of Prevalent Patients Waitlisted']

In [7]:
for col in  df[fillwithmean]:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [8]:
df[fillwithmean] = df[fillwithmean].fillna(df[fillwithmean].mean())

In [9]:
fillwithnotavailable = ['Patient Transfusion category text', 'Patient Hospitalization category text', 'Patient Hospital Readmission Category', 'Patient Survival Category Text', 'Patient Infection category text', 'Fistula Category Text', 'SWR category text', '95% C.I. (upper limit) for SWR', '95% C.I. (lower limit) for SWR', 'PPPW category text']

In [10]:
df[fillwithnotavailable] = df[fillwithnotavailable].fillna('Not Available')

In [14]:
df[['Address Line 2', 'Chain Organization']] = df[['Address Line 2', 'Chain Organization']].fillna('-')

In [15]:
df.isnull().values.any() 

False

In [None]:
df.dtypes

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
for col in df.columns:
    print (col)

In [None]:
for col in df.columns: 
    print(col) 

In [None]:
df.iloc[:, :60].info()

In [None]:
df.iloc[:, 60:].info()

In [None]:
df.iloc[:, :30].describe()

In [None]:
df.iloc[:, 30:90].describe()

In [None]:
df.iloc[:, 90:].describe()

# Data Definitions

### Provider Number
Data type: int64

Value count: 7626

Number of unique values: 7626

Percent unique values: 100% unique

Ranges of values: 12306 - 852568

Length of values: 5, 6

Description: Lists the numeric code used to identify the provider listed.

In [None]:
df['Provider Number'].head()

In [None]:
df['Provider Number'].tail()

In [None]:
set([(len(str(num))) for num in df['Provider Number']])

In [None]:
df['Provider Number'].describe()

In [None]:
df['Provider Number'].nunique()

### Network
Data type: int64

Value count: 7626 

Number of unique values: 18

Percent unique values: 0% unique

Ranges of values: 1 - 18

Length of values: 1, 2

Description: Lists the numeric code representing geographic area of all Medicare-approved ESRD (End Stage Renal Disease) facilities. 
Map of the network can be found here:
https://esrdncc.org/en/ESRD-network-map/

Value description:
- 1   : Network 1 (CT, ME, MA, NH,RI,VT)
- 2   : Network 2 (NY)
- 3   : Network 3 (NJ, PR, VI) 
- 4   : Network 4 (DE, PA)
- 5   : Network 5 (DC, MD, VA,WV)
- 6   : Network 6 (GA, NC, SC)
- 7   : Network 7 (FL)
- 8   : Network 8 (AL, MS, TN)
- 9   : Network 9 (IN, KY, OH)
- 10  : Network 10 (IL)
- 11  : Network 11 (MI, MN, ND, SD,WI)
- 12  : Network 12 (IA, KS, MO, NE)
- 13  : Network 13 (AR, LA, OK)
- 14  : Network 14 (TX)
- 15  : Network 15 (AZ, CO, NV, NM, UT,WY)
- 16  : Network 16 (AK, ID, MT, OR, WA)
- 17  : Network 17 (AS, GU, HI, MP, N. CA)
- 18  : Network 18 (S. CA)

In [None]:
df['Network'].head()

In [None]:
df['Network'].tail()

In [None]:
df['Network'].describe()

In [None]:
df['Network'].nunique()

In [None]:
df['Network'].unique()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Network'].value_counts()]).value_counts()

### Facility Name
Data type: object

Value count: 7626 

Number of unique values: 7448

Percent unique values: 96.5% unique

Length of values: 7- 68

Description: Lists the name of the facility listed.

In [None]:
df['Facility Name'].head()

In [None]:
df['Facility Name'].tail()

In [None]:
df['Facility Name'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Facility Name'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Facility Name']])

### Five Star Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0% unique

Ranges of values: "01/01/2015 - 12/31/2018"

Length of values: 23

Description: Lists the data collection period for the five star rating.

In [None]:
df['Five Star Date'].head()

In [None]:
df['Five Star Date'].tail()

In [None]:
df['Five Star Date'].describe()

In [None]:
set([(len(str(num))) for num in df['Five Star Date']])

### Five Star
Data type: object

Value count: 6725

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1 - 5

Length of values: 1, 13

Description: Lists the 5-star rating for the facility. The “Dialysis Facility Compare (DFC) Star Program” is a rating system developed by Medicare that assigns 1 to 5 stars to dialysis facilities by comparing the health of the patients in their clinics to the patients in other dialysis facilities across the country.

In [None]:
df['Five Star'].head()

In [None]:
df['Five Star'].tail()

In [None]:
df['Five Star'].describe()

In [None]:
df['Five Star'].value_counts()

### Five Star Data Availability Code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0% unique

Ranges of values: 1, 258, 260, 261

Length of values: 1, 3

Description: Lists whether the facility had sufficient five star data available or the reason for why the data is not available.

Value description:
- 1   : N/A
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 260 : Not enough quality measure data to calculate a star rating.
- 261 : Medicare determined that at least one measure included in the star rating calculation was not accurate for this dialysis center.

In [None]:
df['Five Star Data Availability Code'].head()

In [None]:
df['Five Star Data Availability Code'].tail()

In [None]:
df['Five Star Data Availability Code'].value_counts()

In [None]:
df['Five Star Data Availability Code'].count()

### Address Line 1
Data type: object

Value count: 7626

Number of unique values: 7574

Percent unique values: 98.9%

Length of values: 4 - 59

Description: Lists the first line of the address that corresponds to the facility listed.

In [None]:
df['Address Line 1'].head()

In [None]:
df['Address Line 1'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Address Line 1'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Address Line 1']])

### Address Line 2
Data type: object

Value count: 1087

Number of unique values: 599

Percent unique values: 46.9% unique

Length of values: 2 - 47

Description: Lists the second line of the address that corresponds to the facility listed.

In [None]:
df['Address Line 2'].head()

In [None]:
df['Address Line 2'].tail()

In [None]:
df['Address Line 2'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Address Line 2'].value_counts()]).value_counts()

In [None]:
(set([(len(str(num))) for num in df['Address Line 2']]))

### State
Data type: object

Value count: 7626

Number of unique values: 56

Percent unique values: 0% unique

Length of values: 2

Description: Lists the alphabetic postal code used to identify the state that corresponds to the facility listed.

In [None]:
df['State'].head()

In [None]:
df['State'].tail()

In [None]:
df['State'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['State'].value_counts()]).value_counts()

### Zip
Data type: int64

Value count: 7626

Number of unique values: 5276

Percent unique values: 47.5% unique

Ranges of values: 603 - 99801

Length of values: 3 - 5

Description: Lists the full postal ZIP code that corresponds to the facility listed.

In [None]:
df['Zip'].head()

In [None]:
df['Zip'].tail()

In [None]:
df['Zip'].describe()

In [None]:
df['Zip'].nunique()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Zip'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Zip']])

### County
Data type: object

Value count: 7626

Number of unique values: 1258

Percent unique values: 6.33% unique

Length of values: 3 - 21

Description: Lists the name of the county that corresponds to the facility listed.

In [None]:
df['County'].head()

In [None]:
df['County'].tail()

In [None]:
df['County'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['County'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['County']])

### Phone Number
Data type: object

Value count: 7626

Number of unique values: 7581

Percent unique values: 99.0% unique

Length of values: 14

Description: Lists the telephone number that corresponds to the facility listed.

In [None]:
df['Phone Number'].head()

In [None]:
df['Phone Number'].tail()

In [None]:
df['Phone Number'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Phone Number'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Phone Number']])

### Profit or Non-Profit
Data type: object

Value count: 7627

Number of unique values: 2

Percent unique values: 0% unique

Ranges of values: Profit, Non-Profit

Length of values: 6, 10

Description: Indicates if the dialysis facility’s operates as a for profit or non-profit business.

In [None]:
df['Profit or Non-Profit'].head()

In [None]:
df['Profit or Non-Profit'].tail()

In [None]:
df['Profit or Non-Profit'].describe()

In [None]:
df['Profit or Non-Profit'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Profit or Non-Profit']])

### Chain Owned
Data type: object

Value count: 7626

Number of unique values: 2

Percent unique values: 0% unique

Ranges of values: Yes - No

Length of values: 2 - 3

Description: Indicates whether or not the facility is owned or managed by a chain organization.

In [None]:
df['Chain Owned'].head()

In [None]:
df['Chain Owned'].tail()

In [None]:
df['Chain Owned'].describe()

In [None]:
df['Chain Owned'].value_counts()

### Chain Organization
Data type: object

Value count: 7625

Number of unique values: 111

Percent unique values: 0.734% unique values

Length of values: 3 - 39

Description: Lists the name of the chain organization if applicable.

In [None]:
df['Chain Organization'].head()

In [None]:
df['Chain Organization'].tail()

In [None]:
df['Chain Organization'].describe()

In [None]:
df['Chain Organization'].isnull().values.any()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Chain Organization'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Chain Organization']])

### Late Shift
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0% unique

Ranges of values: 0 - 1

Length of values: 1

Description: Lists whether or not the facility has a shift starting at 5:00 p.m. or later.

In [None]:
df['Late Shift'].head()

In [None]:
df['Late Shift'].tail()

In [None]:
df['Late Shift'].describe()

In [None]:
df['Late Shift'].value_counts()

###  # of Dialysis Stations
Data type: int64

Value count: 7626

Number of unique values: 68

Percent unique values: 0.157%

Ranges of values: 0 - 80

Mean: 17.4

Length of values: 1 - 2

Description: Indicates the total number of dialysis stations at the dialysis facility. 

In [None]:
df['# of Dialysis Stations'].head()

In [None]:
df['# of Dialysis Stations'].tail()

In [None]:
df['# of Dialysis Stations'].describe()

In [None]:
df['# of Dialysis Stations'].nunique()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['# of Dialysis Stations'].value_counts()]).value_counts()

### Offers in-center hemodialysis
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0% unique

Ranges of values: 0 - 1

Length of values: 1

Description: Indicates whether the facility offers in-center hemodialysis. 

In [None]:
df['Offers in-center hemodialysis'].head()

In [None]:
df['Offers in-center hemodialysis'].tail()

In [None]:
df['Offers in-center hemodialysis'].describe()

In [None]:
df['Offers in-center hemodialysis'].value_counts()

### Offers peritoneal dialysis
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0% unique

Ranges of values: 0 - 1

Length of values: 1

Description: Indicates whether the facility offers peritoneal dialysis.

In [None]:
df['Offers peritoneal dialysis'].head()

In [None]:
df['Offers peritoneal dialysis'].tail()

In [None]:
df['Offers peritoneal dialysis'].describe()

In [None]:
df['Offers peritoneal dialysis'].nunique()

In [None]:
df['Offers peritoneal dialysis'].value_counts()

### Offers home hemodialysis training
Data type: int64

Value count: 7626

Number of unique values: 2

Percent unique values: 0% unique

Ranges of values: 0 - 1

Length of values: 1

Description: Indicates whether the facility offers home hemodialysis training.

In [None]:
df['Offers home hemodialysis training'].head()

In [None]:
df['Offers home hemodialysis training'].tail()

In [None]:
df['Offers home hemodialysis training'].describe()

In [None]:
df['Offers home hemodialysis training'].nunique()

In [None]:
df['Offers home hemodialysis training'].value_counts()

### Certification or Recertification Date
Data type: object

Value count: 7626

Number of unique values: 4800

Percent unique values: 0.157%

Length of values: 10

Description: Lists the initial or recertification date for the facility listed. These facilities are certified if they pass inspection. Medicare or Medicaid only covers care provided by certified providers. Being certified is not the same as being accredited. 

In [None]:
df['Certification or Recertification Date'].head()

In [None]:
df['Certification or Recertification Date'].tail()

In [None]:
df['Certification or Recertification Date'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['# of Dialysis Stations'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Certification or Recertification Date']])

### Claims Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0% unique values

Ranges of values: "07/01/2018 - 06/30/2019"

Length of values: 23

Description: Lists the data collection period for claims-based summaries.

In [None]:
df['Claims Date'].head()

In [None]:
df['Claims Date'].tail()

In [None]:
df['Claims Date'].describe()

In [None]:
set([(len(str(num))) for num in df['Claims Date']])

### CROWNWeb Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0% unique

Ranges of values: "07/01/2018 - 06/30/2019"

Length of values: 23

Description: Lists the data collection period for CROWNWeb based measures.

In [None]:
df['CROWNWeb Date'].head()

In [None]:
df['CROWNWeb Date'].tail()

In [None]:
df['CROWNWeb Date'].describe()

In [None]:
set([(len(str(num))) for num in df['CROWNWeb Date']])

### STrR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0% unique

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient transfusion summary (STrR).

In [None]:
df['STrR Date'].head()

In [None]:
df['STrR Date'].tail()

In [None]:
df['STrR Date'].describe()

In [None]:
set([(len(str(num))) for num in df['STrR Date']])

### Percentage of Medicare patients with Hgb<10 g/dL
Data type: object

Value count: 7626

Number of unique values: 91

Percent unique values: 0.0656% unique values

Ranges of values: 0 - 

Length of values: 1-3, 13

Description: Lists the percentage of patients who had average hemoglobin (hgb) less than 10.0 g/dL. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

In [None]:
df['Percentage of Medicare patients with Hgb<10 g/dL'].head()

In [None]:
df['Percentage of Medicare patients with Hgb<10 g/dL'].tail()

In [None]:
df['Percentage of Medicare patients with Hgb<10 g/dL'].describe()

In [None]:
df['Percentage of Medicare patients with Hgb<10 g/dL'].min()

In [None]:
df['Percentage of Medicare patients with Hgb<10 g/dL'].max()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Medicare patients with Hgb<10 g/dL'].value_counts()]).value_counts()

In [None]:
set([(len(str(num))) for num in df['Percentage of Medicare patients with Hgb<10 g/dL']])

### HGB<10 data availability code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0% unique values

Ranges of values: 1, 199, 201, 258 

Length of values: 1, 3

Description: Lists whether the facility had sufficient hemoglobin (hgb) data available or the reason for why the data is not available. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['HGB<10 data availability code'].head()

In [None]:
df['HGB<10 data availability code'].tail()

In [None]:
df['HGB<10 data availability code'].count()

In [None]:
df['HGB<10 data availability code'].value_counts()

### Percentage of Medicare patients with Hgb>12 g/dL
Data type: object

Value count: 7626

Number of unique values: 21

Percent unique values: 0.0787% unique values

Ranges of values: 0 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of patients who had average hemoglobin (hgb) greater than 12.0 g/dL. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

In [None]:
df['Percentage of Medicare patients with Hgb>12 g/dL'].head()

In [None]:
df['Percentage of Medicare patients with Hgb>12 g/dL'].tail()

In [None]:
df['Percentage of Medicare patients with Hgb>12 g/dL'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Medicare patients with Hgb>12 g/dL'].value_counts()]).value_counts()

In [None]:
df['Percentage of Medicare patients with Hgb>12 g/dL'].min()

In [None]:
df['Percentage of Medicare patients with Hgb>12 g/dL'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Medicare patients with Hgb>12 g/dL']])

### Hgb > 12 data availability code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0% unique values

Ranges of values: 1, 199, 201, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient hemoglobin (hgb) data available or the reason for why the data is not available. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Hgb > 12 data availability code'].head()

In [None]:
df['Hgb > 12 data availability code'].tail()

In [None]:
df['Hgb > 12 data availability code'].describe()

In [None]:
df['Hgb > 12 data availability code'].value_counts()

### Number of Dialysis Patients with Hgb data
Data type: object

Value count: 7626

Number of unique values: 160 

Percent unique values: 0.315%

Ranges of values: 0 - 

Length of values: 1-3, 13

Description: Lists the number of patients included in the hemoglobin (hgb) greater than 12.0 g/dL summary. Hemoglobin is a protein in red blood cells that carries oxygen from the lungs to the rest of the body. A normal hemoglobin level is 11 to 18 grams per deciliter (g/dL), depending on one's age and gender. But 7 to 8 g/dL is a safe level. 

In [None]:
df['Number of Dialysis Patients with Hgb data'].head()

In [None]:
df['Number of Dialysis Patients with Hgb data'].tail()

In [None]:
df['Number of Dialysis Patients with Hgb data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Dialysis Patients with Hgb data'].value_counts()]).value_counts()

In [None]:
df['Number of Dialysis Patients with Hgb data'].min()

In [None]:
df['Number of Dialysis Patients with Hgb data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Dialysis Patients with Hgb data']])

### Patient Transfusion data availability Code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 255, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient transfusion data available or the reason for why the data is not available.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Patient Transfusion data availability Code'].head()

In [None]:
df['Patient Transfusion data availability Code'].tail()

In [None]:
df['Patient Transfusion data availability Code'].count()

In [None]:
df['Patient Transfusion data availability Code'].value_counts()

### Patient Transfusion category text
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Not Available, Worse than Expected, Better than Expected 

Length of values: 11, 13, 19, 20

Description: Patient transfusion category.

Value description:
- As Expected          : Patient transfusions categorized as “As Expected”       
- Worse than Expected  :  Patient transfusions categorized as “Worse than Expected”
- Better than Expected : Patient transfusions categorized as “Better than Expected"
- Not Available 

In [None]:
df['Patient Transfusion category text'].head()

In [None]:
df['Patient Transfusion category text'].tail()

In [None]:
df['Patient Transfusion category text'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Patient Transfusion category text'].value_counts()]).value_counts()

In [None]:
df['Patient Transfusion category text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Patient Transfusion category text']])

### Number of patients included in the transfusion summary
Data type: object

Value count: 7626

Number of unique values: 226

Percent unique values: 0.446%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility’s transfusion summary.

In [None]:
df['Number of patients included in the transfusion summary'].head()

In [None]:
df['Number of patients included in the transfusion summary'].tail()

In [None]:
df['Number of patients included in the transfusion summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients included in the transfusion summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients included in the transfusion summary'].min()

In [None]:
df['Number of patients included in the transfusion summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients included in the transfusion summary']])

### Percent of Adult HD patients with Kt/V >= 1.2
Data type: object

Value count: 7626 

Number of unique values: 54

Percent unique values: 0.170% 

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the percentage of adult HD patients with Kt/V greater than or equal to 1.2. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Percent of Adult HD patients with Kt/V >= 1.2'].head()

In [None]:
df['Percent of Adult HD patients with Kt/V >= 1.2'].tail()

In [None]:
df['Percent of Adult HD patients with Kt/V >= 1.2'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percent of Adult HD patients with Kt/V >= 1.2'].value_counts()]).value_counts()

In [None]:
df['Percent of Adult HD patients with Kt/V >= 1.2'].min()

In [None]:
df['Percent of Adult HD patients with Kt/V >= 1.2'].max()

In [None]:
set([(len(str(num))) for num in df['Percent of Adult HD patients with Kt/V >= 1.2']])

### Adult HD Kt/V data availability code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 256, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient adult HD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description: 
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 256 : The dialysis center does not provide hemodialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Adult HD Kt/V data availability code'].head()

In [None]:
df['Adult HD Kt/V data availability code'].tail()

In [None]:
df['Adult HD Kt/V data availability code'].count()

In [None]:
df['Adult HD Kt/V data availability code'].value_counts()

### Percentage of Adult PD PTS with Kt/V >= 1.7
Data type: object

Value count: 7626

Number of unique values: 71

Percent unique values: 0.210%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the percentage of adult PD PTS patients with Kt/V greater than or equal to 1.7. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Percentage of Adult PD PTS with Kt/V >= 1.7'].head()

In [None]:
df['Percentage of Adult PD PTS with Kt/V >= 1.7'].tail()

In [None]:
df['Percentage of Adult PD PTS with Kt/V >= 1.7'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult PD PTS with Kt/V >= 1.7'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult PD PTS with Kt/V >= 1.7'].min()

In [None]:
df['Percentage of Adult PD PTS with Kt/V >= 1.7'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult PD PTS with Kt/V >= 1.7']])

### Adult PD Kt/V Data Availability Code
Data type: int64 

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 257, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient adult PD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 257 : The dialysis center does not provide peritoneal dialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data.

In [None]:
df['Adult PD Kt/V Data Availability Code'].head()

In [None]:
df['Adult PD Kt/V Data Availability Code'].tail()

In [None]:
df['Adult PD Kt/V Data Availability Code'].count()

In [None]:
df['Adult PD Kt/V Data Availability Code'].value_counts()

### Percentage of Pediatric HD patients with Kt/V >= 1.2
Data type: object

Value count: 7626

Number of unique values: 12

Percent unique values: 0.105%

Ranges of values: 100 - 

Length of values: 2, 3, 13

Description: Lists the percentage of pediatric HD patients with Kt/V greater than or equal to 1.2. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Percentage of Pediatric HD patients with Kt/V >= 1.2'].head()

In [None]:
df['Percentage of Pediatric HD patients with Kt/V >= 1.2'].tail()

In [None]:
df['Percentage of Pediatric HD patients with Kt/V >= 1.2'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Pediatric HD patients with Kt/V >= 1.2'].value_counts()]).value_counts()

In [None]:
df['Percentage of Pediatric HD patients with Kt/V >= 1.2'].min()

In [None]:
df['Percentage of Pediatric HD patients with Kt/V >= 1.2'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Pediatric HD patients with Kt/V >= 1.2']])

### Pediatric HD Kt/V Data Availability Code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258, 259

Length of values: 1, 3

Description: Lists whether the facility had sufficient Pediatric HD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment(characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 259 : The dialysis center does not provide hemodialysis and/or peritoneal dialysis to pediatric patients during the reporting period.

In [None]:
df['Pediatric HD Kt/V Data Availability Code'].head()

In [None]:
df['Pediatric HD Kt/V Data Availability Code'].tail()

In [None]:
df['Pediatric HD Kt/V Data Availability Code'].count()

In [None]:
df['Pediatric HD Kt/V Data Availability Code'].value_counts()

### Number of Adult HD patients with Kt/V data
Data type: object

Value count: 7626

Number of unique values: 271

Percent unique values: 0.498%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of adult HD patients included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of Adult HD patients with Kt/V data'].head()

In [None]:
df['Number of Adult HD patients with Kt/V data'].tail()

In [None]:
df['Number of Adult HD patients with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Adult HD patients with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of Adult HD patients with Kt/V data'].min()

In [None]:
df['Number of Adult HD patients with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Adult HD patients with Kt/V data']])

### Number of Adult HD patient-months with Kt/V data
Data type: object

Value count: 7626

Number of unique values: 1630

Percent unique values: 4.62%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the number of adult HD patient-months included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of Adult HD patient-months with Kt/V data'].head()

In [None]:
df['Number of Adult HD patient-months with Kt/V data'].tail()

In [None]:
df['Number of Adult HD patient-months with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Adult HD patient-months with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of Adult HD patient-months with Kt/V data'].min()

In [None]:
df['Number of Adult HD patient-months with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Adult HD patient-months with Kt/V data']])

### Number of Adult PD patients with Kt/V data
Data type: object

Value count: 7626

Number of unique values: 137

Percent unique values: 0.446%

Ranges of values: 0 -

Length of values: 1 - 3, 13

Description: Lists the number of adult PD patients included in Kt/V greater than or equal to 1.7 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of Adult PD patients with Kt/V data'].head()

In [None]:
df['Number of Adult PD patients with Kt/V data'].tail()

In [None]:
df['Number of Adult PD patients with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Adult PD patients with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of Adult PD patients with Kt/V data'].min()

In [None]:
df['Number of Adult PD patients with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Adult PD patients with Kt/V data']])

### Number of Adult PD patient-months with Kt/V data
Data type: object

Value count: 7626

Number of unique values: 611

Percent unique values: 2.65%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the number of adult PD patient-months included in Kt/V greater than or equal to 1.7 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered). 

In [None]:
df['Number of Adult PD patient-months with Kt/V data'].head()

In [None]:
df['Number of Adult PD patient-months with Kt/V data'].tail()

In [None]:
df['Number of Adult PD patient-months with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Adult PD patient-months with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of Adult PD patient-months with Kt/V data'].unique()

In [None]:
df['Number of Adult PD patient-months with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Adult PD patient-months with Kt/V data']])

### Number of Pediatric HD patients with Kt/V data
Data type: object

Value count: 7626

Number of unique values: 20

Percent unique values: 0.0393%

Ranges of values: 0 - 

Length of values: 1 - 2, 13

Description: Lists the number of pediatric HD patients included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of Pediatric HD patients with Kt/V data'].head()

In [None]:
df['Number of Pediatric HD patients with Kt/V data'].tail()

In [None]:
df['Number of Pediatric HD patients with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Pediatric HD patients with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of Pediatric HD patients with Kt/V data'].min()

In [None]:
df['Number of Pediatric HD patients with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Pediatric HD patients with Kt/V data']])

### Number of Pediatric HD patient-months with Kt/V data
Data type: object 

Value count: 7626

Number of unique values: 68

Percent unique values: 0.459%

Ranges of values: 1 - 

Length of values: 1 - 3, 13

Description: Lists the number of pediatric HD patientmonths included in Kt/V greater than or equal to 1.2 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of Pediatric HD patient-months with Kt/V data'].head()

In [None]:
df['Number of Pediatric HD patient-months with Kt/V data'].tail()

In [None]:
df['Number of Pediatric HD patient-months with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Pediatric HD patient-months with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of Pediatric HD patient-months with Kt/V data'].min()

In [None]:
df['Number of Pediatric HD patient-months with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Pediatric HD patient-months with Kt/V data']])

### Hypercalcemia Data Availability Code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient hypercalcemia data available or the reason for why the data is not available. Hypercalcemia is a condition in which the calcium level in one's blood is above normal. Too much calcium in the blood can weaken bones, create kidney stones, and interfere with how the heart and brain work. Hypercalcemia is usually a result of overactive parathyroid glands.
A normal serum calcium level is 8-10 mg/dL (2-2.5 mmol/L) with some interlaboratory variation in the reference range, and hypercalcemia is defined as a serum calcium level greater than 10.5 mg/dL (>2.5 mmol/L).

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Hypercalcemia Data Availability Code'].head()

In [None]:
df['Hypercalcemia Data Availability Code'].tail()

In [None]:
df['Hypercalcemia Data Availability Code'].count()

In [None]:
df['Hypercalcemia Data Availability Code'].value_counts()

### Number of patients in hypercalcemia summary
Data type: object

Value count: 7626

Number of unique values: 311

Percent unique values: 0.656%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility’s hypercalcemia summary. Hypercalcemia is a condition in which the calcium level in one's blood is above normal. Too much calcium in the blood can weaken bones, create kidney stones, and interfere with how the heart and brain work. Hypercalcemia is usually a result of overactive parathyroid glands.
A normal serum calcium level is 8-10 mg/dL (2-2.5 mmol/L) with some interlaboratory variation in the reference range, and hypercalcemia is defined as a serum calcium level greater than 10.5 mg/dL (>2.5 mmol/L).

In [None]:
df['Number of patients in hypercalcemia summary'].head()

In [None]:
df['Number of patients in hypercalcemia summary'].tail()

In [None]:
df['Number of patients in hypercalcemia summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients in hypercalcemia summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients in hypercalcemia summary'].min()

In [None]:
df['Number of patients in hypercalcemia summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients in hypercalcemia summary']])

### Number of patient-months in hypercalcemia summary
Data type: object

Value count: 7626

Number of unique values: 1866

Percent unique values: 5.51%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the number of patient-months included in the facility’s hypercalcemia summary. Hypercalcemia is a condition in which the calcium level in one's blood is above normal. Too much calcium in the blood can weaken bones, create kidney stones, and interfere with how the heart and brain work. Hypercalcemia is usually a result of overactive parathyroid glands.
A normal serum calcium level is 8-10 mg/dL (2-2.5 mmol/L) with some interlaboratory variation in the reference range, and hypercalcemia is defined as a serum calcium level greater than 10.5 mg/dL (>2.5 mmol/L).

In [None]:
df['Number of patient-months in hypercalcemia summary'].head()

In [None]:
df['Number of patient-months in hypercalcemia summary'].tail()

In [None]:
df['Number of patient-months in hypercalcemia summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patient-months in hypercalcemia summary'].value_counts()]).value_counts()

In [None]:
df['Number of patient-months in hypercalcemia summary'].min()

In [None]:
df['Number of patient-months in hypercalcemia summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patient-months in hypercalcemia summary']])

### Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)
Data type: object

Value count: 7626

Number of unique values: 49

Percent unique values: 0.197%

Ranges of values: 0 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of Adult patients with Hypercalcemia (serum calcium greater than 10.2 mg/dL). Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders. 

In [None]:
df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)'].head()

In [None]:
df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)'].tail()

In [None]:
df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)'].min()

In [None]:
df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with hypercalcemia (serum calcium greater than 10.2 mg/dL)']])

### Number of patients in Serum phosphorus summary
Data type: object

Value count: 7626

Number of unique values: 324

Percent unique values: 0.721%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility’s serum phosphorus summary.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Number of patients in Serum phosphorus summary'].head()

In [None]:
df['Number of patients in Serum phosphorus summary'].tail()

In [None]:
df['Number of patients in Serum phosphorus summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients in Serum phosphorus summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients in Serum phosphorus summary'].min()

In [None]:
df['Number of patients in Serum phosphorus summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients in Serum phosphorus summary']])

### Number of patient-months in Serum phosphorus summary 
Data type: object

Value count: 7626

Number of unique values: 1904

Percent unique values: 6.22%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the number of patient-months included in the facility’s serum phosphorus summary.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Number of patient-months in Serum phosphorus summary '].head()

In [None]:
df['Number of patient-months in Serum phosphorus summary '].tail()

In [None]:
df['Number of patient-months in Serum phosphorus summary '].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patient-months in Serum phosphorus summary '].value_counts()]).value_counts()

In [None]:
df['Number of patient-months in Serum phosphorus summary '].min()

In [None]:
df['Number of patient-months in Serum phosphorus summary '].max()

In [None]:
set([(len(str(num))) for num in df['Number of patient-months in Serum phosphorus summary ']])

### Serum phosphorus Data Availability Code 
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient serum phosphorus data available or the reason for why the data is not available.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Serum phosphorus Data Availability Code '].head()

In [None]:
df['Serum phosphorus Data Availability Code '].tail()

In [None]:
df['Serum phosphorus Data Availability Code '].count()

In [None]:
df['Serum phosphorus Data Availability Code '].value_counts()

### Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL
Data type: object

Value count: 7626

Number of unique values: 36

Percent unique values:  0.0918%

Ranges of values: 0 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of Adult patients with serum phosphorus less than 3.5 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL'].head()

In [None]:
df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL'].tail()

In [None]:
df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL'].min()

In [None]:
df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with serum phosphorus less than 3.5 mg/dL']])

### Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL
Data type: object

Value count: 7626

Number of unique values: 47

Percent unique values: 0.118%

Ranges of values: 10 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL'].head()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL'].tail()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL'].min()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with serum phosphorus between 3.5-4.5 mg/dL']])

### Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL
Data type: object

Value count: 7626

Number of unique values: 45

Percent unique values: 0.0656%

Ranges of values: 12 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL'].head()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL'].tail()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL'].min()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with serum phosphorus between 4.6-5.5 mg/dL']])

### Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL
Data type: object

Value count: 7626

Number of unique values: 45

Percent unique values: 0.0525%

Ranges of values: 10 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL'].head()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL'].tail()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL'].min()

In [None]:
df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with serum phosphorus between 5.6-7.0 mg/dL']])

### Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL
Data type: object

Value count: 7626

Number of unique values: 41

Percent unique values: 0.0131%

Ranges of values: 0 - 

Length of values: 1 - 2, 13

Description: Lists the percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL.
Serum phosphorus is measured in milligrams of phosphorus per deciliter of blood (mg/dL). According to Mayo Medical Laboratories, a normal range for adults is generally 2.5 to 4.5 mg/dL. The normal range varies slightly depending on your age.
Most commonly, a high level of phosphorus is related to a kidney disorder. It shows that your kidneys are having difficulty clearing phosphorus from your blood. A high level of phosphorus can also mean uncontrolled diabetes and other endocrine disorders.

In [None]:
df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL'].head()

In [None]:
df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL'].tail()

In [None]:
df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL'].min()

In [None]:
df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with serum phosphorus greater than 7.0 mg/dL']])

### SHR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0%

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient hospitalization summary.

In [None]:
df['SHR Date'].head()

In [None]:
df['SHR Date'].tail()

In [None]:
df['SHR Date'].value_counts()

In [None]:
set([(len(str(num))) for num in df['SHR Date']])

### SRR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0%

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient readmission summary.

In [None]:
df['SRR Date'].head()

In [None]:
df['SRR Date'].tail()

In [None]:
df['SRR Date'].value_counts()

In [None]:
set([(len(str(num))) for num in df['SRR Date']])

### SMR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0%

Ranges of values: "01/01/2015 - 12/31/2018"

Length of values: 23

Description: Lists the data collection period for patient survival summary.

In [None]:
df['SMR Date'].head()

In [None]:
df['SMR Date'].tail()

In [None]:
df['SMR Date'].value_counts()

In [None]:
set([(len(str(num))) for num in df['SRR Date']])

### Patient Hospitalization category text
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Not Available, Worse than Expected, Better than Expected

Length of values: 11, 13, 19, 20

Description: Patient hospitalization category.

In [None]:
df['Patient Hospitalization category text'].head()

In [None]:
df['Patient Hospitalization category text'].tail()

In [None]:
df['Patient Hospitalization category text'].count()

In [None]:
df['Patient Hospitalization category text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Patient Hospitalization category text']])

### Patient Hospitalization data availability Code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0% 

Ranges of values: 1, 199, 201, 255, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient hospitalization data available or the reason for why the data is not available.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Patient Hospitalization data availability Code'].head()

In [None]:
df['Patient Hospitalization data availability Code'].tail()

In [None]:
df['Patient Hospitalization data availability Code'].count()

In [None]:
df['Patient Hospitalization data availability Code'].value_counts()

### Patient Hospital Readmission Category
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Not Available, Worse than Expected, Better than Expected

Length of values: 11, 13, 19, 20

Description: Patient readmission category.

Value description:
- As Expected          : Patient hospital readmission categorized as “As Expected”       
- Worse than Expected  :  Patient hospital readmission categorized as “Worse than Expected”
- Better than Expected : Patient hospital readmission categorized as “Better than Expected"
- Not Available 

In [None]:
df['Patient Hospital Readmission Category'].head()

In [None]:
df['Patient Hospital Readmission Category'].tail()

In [None]:
df['Patient Hospital Readmission Category'].count()

In [None]:
df['Patient Hospital Readmission Category'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Patient Hospital Readmission Category']])

### Patient Hospital Readmission data availability Code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: 1, 199, 255, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient readmission data available or the reason for why the data is not available.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Patient Hospital Readmission data availability Code'].head()

In [None]:
df['Patient Hospital Readmission data availability Code'].tail()

In [None]:
df['Patient Hospital Readmission data availability Code'].count()

In [None]:
df['Patient Hospital Readmission data availability Code'].value_counts()

### Patient Survival Category Text
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Worse than Expected, Better than Expected, Not Available

Length of values: 11, 13, 19, 20

Description: Patient survival category (Better, Worse or As Expected).

Value description:
- As Expected          : Patient transfusions categorized as “As Expected”       
- Worse than Expected  :  Patient transfusions categorized as “Worse than Expected”
- Better than Expected : Patient transfusions categorized as “Better than Expected"
- Not Available 

In [None]:
df['Patient Survival Category Text'].head()

In [None]:
df['Patient Survival Category Text'].tail()

In [None]:
df['Patient Survival Category Text'].count()

In [None]:
df['Patient Survival Category Text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Patient Hospital Readmission Category']])

### Patient Survival data availability code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 255, 258

Length of values: 1, 3 

Description: Lists whether the facility had sufficient patient survival data available or the reason for why the data is not available.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Patient Survival data availability code'].head()

In [None]:
df['Patient Survival data availability code'].tail()

In [None]:
df['Patient Survival data availability code'].count()

In [None]:
df['Patient Survival data availability code'].value_counts()

### Number of patients included in hospitalization summary
Data type: object

Value count: 7626

Number of unique values: 250

Percent unique values: 0.485%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility’s hospitalization summary.

In [None]:
df['Number of patients included in hospitalization summary'].head()

In [None]:
df['Number of patients included in hospitalization summary'].tail()

In [None]:
df['Number of patients included in hospitalization summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients included in hospitalization summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients included in hospitalization summary'].min()

In [None]:
df['Number of patients included in hospitalization summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients included in hospitalization summary']])

### Number of hospitalizations included in hospital readmission summary
Data type: object

Value count: 7626

Number of unique values: 296

Percent unique values: 0.564%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of index discharges included in the facility’s readmission summary.

In [None]:
df['Number of hospitalizations included in hospital readmission summary'].head()

In [None]:
df['Number of hospitalizations included in hospital readmission summary'].tail()

In [None]:
df['Number of hospitalizations included in hospital readmission summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of hospitalizations included in hospital readmission summary'].value_counts()]).value_counts()

In [None]:
df['Number of hospitalizations included in hospital readmission summary'].min()

In [None]:
df['Number of hospitalizations included in hospital readmission summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of hospitalizations included in hospital readmission summary']])

### Number of patients included in survival summary
Data type: object

Value count: 7626

Number of unique values: 808

Percent unique values: 1.95%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the number of patients included in the facility’s survival summary.

In [None]:
df['Number of patients included in survival summary'].head()

In [None]:
df['Number of patients included in survival summary'].tail()

In [None]:
df['Number of patients included in survival summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients included in survival summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients included in survival summary'].min()

In [None]:
df['Number of patients included in survival summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients included in survival summary']])

### Mortality Rate (Facility)
Data type: object

Value count: 7626

Number of unique values: 391

Percent unique values: 1.01%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the facility’s mortality rate per 100 patient-years.

In [None]:
df['Mortality Rate (Facility)'].head()

In [None]:
df['Mortality Rate (Facility)'].tail()

In [None]:
df['Mortality Rate (Facility)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Mortality Rate (Facility)'].value_counts()]).value_counts()

In [None]:
df['Mortality Rate (Facility)'].min()

In [None]:
df['Mortality Rate (Facility)'].max()

In [None]:
set([(len(str(num))) for num in df['Mortality Rate (Facility)']])

### Mortality Rate: Upper Confidence Limit (97.5%)
Data type: object

Value count: 7626

Number of unique values: 546

Percent unique values: 1.51%

Ranges of values: 10.3 -

Length of values: 2 - 5, 13

Description: Lists the upper confident limit (97.5%) for mortality rate per 100 patient-years.

In [None]:
df['Mortality Rate: Upper Confidence Limit (97.5%)'].head()

In [None]:
df['Mortality Rate: Upper Confidence Limit (97.5%)'].tail()

In [None]:
df['Mortality Rate: Upper Confidence Limit (97.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Mortality Rate: Upper Confidence Limit (97.5%)'].value_counts()]).value_counts()

In [None]:
df['Mortality Rate: Upper Confidence Limit (97.5%)'].min()

In [None]:
df['Mortality Rate: Upper Confidence Limit (97.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Mortality Rate: Upper Confidence Limit (97.5%)']])

### Mortality Rate: Lower Confidence Limit (2.5%)
Data type: object

Value count: 7626

Number of unique values: 324

Percent unique values: 0.564%

Ranges of values: 0.1 - 

Length of values: 1 - 4, 13

Description: Lists the lower confident limit (2.5%) for mortality rate per 100 patient-years.

In [None]:
df['Mortality Rate: Lower Confidence Limit (2.5%)'].head()

In [None]:
df['Mortality Rate: Lower Confidence Limit (2.5%)'].tail()

In [None]:
df['Mortality Rate: Lower Confidence Limit (2.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Mortality Rate: Lower Confidence Limit (2.5%)'].value_counts()]).value_counts()

In [None]:
df['Mortality Rate: Lower Confidence Limit (2.5%)'].min()

In [None]:
df['Mortality Rate: Lower Confidence Limit (2.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Mortality Rate: Lower Confidence Limit (2.5%)']])

### Readmission Rate (Facility)
Data type: object

Value count: 7626

Number of unique values: 464

Percent unique values: 0.944%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the facility’s readmission rate as a percentage of hospital discharges.

In [None]:
df['Readmission Rate (Facility)'].head()

In [None]:
df['Readmission Rate (Facility)'].tail()

In [None]:
df['Readmission Rate (Facility)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Readmission Rate (Facility)'].value_counts()]).value_counts()

In [None]:
df['Readmission Rate (Facility)'].min()

In [None]:
df['Readmission Rate (Facility)'].max()

In [None]:
set([(len(str(num))) for num in df['Readmission Rate (Facility)']])

### Readmission Rate: Upper Confidence Limit (97.5%)
Data type: object

Value count: 7626

Number of unique values: 544

Percent unique values: 1.19%

Ranges of values: 104.2

Length of values: 2 - 5, 13

Description: Lists the upper confident limit (97.5%) for readmission rate as a percentage of hospital discharges.

In [None]:
df['Readmission Rate: Upper Confidence Limit (97.5%)'].head()

In [None]:
df['Readmission Rate: Upper Confidence Limit (97.5%)'].tail()

In [None]:
df['Readmission Rate: Upper Confidence Limit (97.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Readmission Rate: Upper Confidence Limit (97.5%)'].value_counts()]).value_counts()

In [None]:
df['Readmission Rate: Upper Confidence Limit (97.5%)'].min()

In [None]:
df['Readmission Rate: Upper Confidence Limit (97.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Readmission Rate: Upper Confidence Limit (97.5%)']])

### Readmission Rate: Lower Confidence Limit (2.5%)
Data type: object

Value count: 7626

Number of unique values: 339

Percent unique values: 0.315%

Ranges of values: 0.1 - 

Length of values: 1 - 4, 13

Description: Lists the lower confident limit (2.5%) for readmission rate as a percentage of hospital discharges.

In [None]:
df['Readmission Rate: Lower Confidence Limit (2.5%)'].head()

In [None]:
df['Readmission Rate: Lower Confidence Limit (2.5%)'].tail()

In [None]:
df['Readmission Rate: Lower Confidence Limit (2.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Readmission Rate: Lower Confidence Limit (2.5%)'].value_counts()]).value_counts()

In [None]:
df['Readmission Rate: Lower Confidence Limit (2.5%)'].min()

In [None]:
df['Readmission Rate: Lower Confidence Limit (2.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Readmission Rate: Lower Confidence Limit (2.5%)']])

### Hospitalization Rate (Facility)
Data type: object

Value count: 7626

Number of unique values: 2005

Percent unique values: 7.50%

Ranges of values: 0 - 

Length of values: 1 - 5, 13

Description: Lists the facility’s hospitalization rate per 100 patient-years.

In [None]:
df['Hospitalization Rate (Facility)'].head()

In [None]:
df['Hospitalization Rate (Facility)'].tail()

In [None]:
df['Hospitalization Rate (Facility)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Hospitalization Rate (Facility)'].value_counts()]).value_counts()

In [None]:
df['Hospitalization Rate (Facility)'].min()

In [None]:
df['Hospitalization Rate (Facility)'].max()

In [None]:
set([(len(str(num))) for num in df['Hospitalization Rate (Facility)']])

### Hospitalization Rate: Upper Confidence Limit (97.5%)
Data type: object

Value count: 7626

Number of unique values: 2584

Percent unique values: 12.7%

Ranges of values: 1079.9 - 

Length of values: 3, 5, 6, 13

Description: Lists the upper confident limit (97.5%) for hospitalization rate per 100 patient-years.

In [None]:
df['Hospitalization Rate: Upper Confidence Limit (97.5%)'].head()

In [None]:
df['Hospitalization Rate: Upper Confidence Limit (97.5%)'].tail()

In [None]:
df['Hospitalization Rate: Upper Confidence Limit (97.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Hospitalization Rate: Upper Confidence Limit (97.5%)'].value_counts()]).value_counts()

In [None]:
df['Hospitalization Rate: Upper Confidence Limit (97.5%)'].min()

In [None]:
df['Hospitalization Rate: Upper Confidence Limit (97.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Hospitalization Rate: Upper Confidence Limit (97.5%)']])

### Hospitalization Rate: Lower Confidence Limit (2.5%)
Data type: object

Value count: 7626

Number of unique values: 1711

Percent unique values: 5.72%

Ranges of values: 0 -

Length of values: 1 - 5, 13

Description: Lists the lower confident limit (2.5%) for hospitalization rate per 100 patient-years.

In [None]:
df['Hospitalization Rate: Lower Confidence Limit (2.5%)'].head()

In [None]:
df['Hospitalization Rate: Lower Confidence Limit (2.5%)'].tail()

In [None]:
df['Hospitalization Rate: Lower Confidence Limit (2.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Hospitalization Rate: Lower Confidence Limit (2.5%)'].value_counts()]).value_counts()

In [None]:
df['Hospitalization Rate: Lower Confidence Limit (2.5%)'].min()

In [None]:
df['Hospitalization Rate: Lower Confidence Limit (2.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Hospitalization Rate: Lower Confidence Limit (2.5%)']])

### Number of pediatric PD patients with Kt/V data
Data type: object

Value count: 7626

Number of unique values: 26

Percent unique values: 0.0918%

Ranges of values: 0 - 

Length of values: 1, 2, 13

Description: Lists the number of pediatric PD patients included in Kt/V greater than or equal to 1.8 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of pediatric PD patients with Kt/V data'].head()

In [None]:
df['Number of pediatric PD patients with Kt/V data'].tail()

In [None]:
df['Number of pediatric PD patients with Kt/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of pediatric PD patients with Kt/V data'].value_counts()]).value_counts()

In [None]:
df['Number of pediatric PD patients with Kt/V data'].min()

In [None]:
df['Number of pediatric PD patients with Kt/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of pediatric PD patients with Kt/V data']])

### Pediatric PD Kt/V Data Availability Code
Data type: int64 

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258, 259

Length of values: 1, 3

Description: Lists whether the facility had sufficient pediatric PD Kt/V data available or the reason for why the data is not available. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 
- 259 : The dialysis center does not provide hemodialysis and/or peritoneal dialysis to pediatric patients during the reporting period.

In [None]:
df['Pediatric PD Kt/V Data Availability Code'].head()

In [None]:
df['Pediatric PD Kt/V Data Availability Code'].tail()

In [None]:
df['Pediatric PD Kt/V Data Availability Code'].count()

In [None]:
df['Pediatric PD Kt/V Data Availability Code'].value_counts()

### Number of pediatric PD patient-months with KT/V data
Data type: object

Value count: 7626

Number of unique values: 82

Percent unique values: 0.433%

Ranges of values: 1 - 

Length of values: 1 - 3, 13

Description: Lists the number of pediatric PD patient months included in Kt/V greater than or equal to 1.8 summary. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Number of pediatric PD patient-months with KT/V data'].head()

In [None]:
df['Number of pediatric PD patient-months with KT/V data'].tail()

In [None]:
df['Number of pediatric PD patient-months with KT/V data'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of pediatric PD patient-months with KT/V data'].value_counts()]).value_counts()

In [None]:
df['Number of pediatric PD patient-months with KT/V data'].min()

In [None]:
df['Number of pediatric PD patient-months with KT/V data'].max()

In [None]:
set([(len(str(num))) for num in df['Number of pediatric PD patient-months with KT/V data']])

### Percentage of pediatric PD patients with Kt/V>=1.8
Data type: object

Value count: 7626

Number of unique values: 22

Percent unique values: 0.105%

Ranges of values: 23 - 

Length of values: 2, 13

Description: Lists the percentage of pediatric PD patients with Kt/V greater than or equal to 1.8. The parameter Kt/V is a measurement of the efficacy of a hemodialysis session. It identifies the effective removal of a specific solute (clearance K) resulting from a given treatment (characterized by time t) in a given patient (with a specific volume of distribution V for the solute considered).

In [None]:
df['Percentage of pediatric PD patients with Kt/V>=1.8'].head()

In [None]:
df['Percentage of pediatric PD patients with Kt/V>=1.8'].tail()

In [None]:
df['Percentage of pediatric PD patients with Kt/V>=1.8'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of pediatric PD patients with Kt/V>=1.8'].value_counts()]).value_counts()

In [None]:
df['Percentage of pediatric PD patients with Kt/V>=1.8'].min()

In [None]:
df['Percentage of pediatric PD patients with Kt/V>=1.8'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of pediatric PD patients with Kt/V>=1.8']])

### SIR Date
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0%

Ranges of values: "01/01/2018 - 12/31/2018"

Length of values: 23

Description: Lists the time period for patient infection summary (SIR).

In [None]:
df['SIR Date'].head()

In [None]:
df['SIR Date'].tail()

In [None]:
df['SIR Date'].count()

In [None]:
df['SIR Date'].value_counts()

In [None]:
set([(len(str(num))) for num in df['SIR Date']])

### Patient Infection Data Availability Code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient infection data available or the reason for why the data is not available.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Patient Infection Data Availability Code'].head()

In [None]:
df['Patient Infection Data Availability Code'].tail()

In [None]:
df['Patient Infection Data Availability Code'].count()

In [None]:
df['Patient Infection Data Availability Code'].value_counts()

### Patient Infection category text
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Worse than Expected, Better than Expected, Not Available

Length of values: 11, 13, 19, 20

Description: Patient infection category.

Value description:
- As Expected          : Patient infection categorized as “As Expected”       
- Worse than Expected  :  Patient infection categorized as “Worse than Expected”
- Better than Expected : Patient infection categorized as “Better than Expected"
- Not Available 

In [None]:
df['Patient Infection category text'].head()

In [None]:
df['Patient Infection category text'].tail()

In [None]:
df['Patient Infection category text'].count()

In [None]:
df['Patient Infection category text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Patient Infection category text']])

### Standard Infection Ratio
Data type: object

Value count: 7626

Number of unique values: 337

Percent unique values: 0.957%

Ranges of values: 0.5 - 

Length of values: 1 - 5, 13

Description: Lists the facility’s Standardized Infection Ratio.

In [None]:
df['Standard Infection Ratio'].head()

In [None]:
df['Standard Infection Ratio'].tail()

In [None]:
df['Standard Infection Ratio'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Standard Infection Ratio'].value_counts()]).value_counts()

In [None]:
df['Standard Infection Ratio'].min()

In [None]:
df['Standard Infection Ratio'].max()

In [None]:
set([(len(str(num))) for num in df['Standard Infection Ratio']])

### SIR: Upper Confidence Limit (97.5%)
Data type: object

Value count: 7626

Number of unique values: 603

Percent unique values: 1.63%

Ranges of values: 0.16 - 

Length of values: 1 - 5, 13

Description: Lists the upper confident limit (97.5%) for Standardized Infection Ratio (SIR).

In [None]:
df['SIR: Upper Confidence Limit (97.5%)'].head()

In [None]:
df['SIR: Upper Confidence Limit (97.5%)'].tail()

In [None]:
df['SIR: Upper Confidence Limit (97.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['SIR: Upper Confidence Limit (97.5%)'].value_counts()]).value_counts()

In [None]:
df['SIR: Upper Confidence Limit (97.5%)'].min()

In [None]:
df['SIR: Upper Confidence Limit (97.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['SIR: Upper Confidence Limit (97.5%)']])

### SIR: Lower Confidence Limit (2.5%)
Data type: object

Value count: 7626

Number of unique values: 191

Percent unique values: 0.485%

Ranges of values: 0.01 - 

Length of values: 1 - 4, 13

Description: Lists the lower confident limit (2.5%) for Standardized Infection Ratio (SIR).

In [None]:
df['SIR: Lower Confidence Limit (2.5%)'].head()

In [None]:
df['SIR: Lower Confidence Limit (2.5%)'].tail()

In [None]:
df['SIR: Lower Confidence Limit (2.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['SIR: Lower Confidence Limit (2.5%)'].value_counts()]).value_counts()

In [None]:
df['SIR: Lower Confidence Limit (2.5%)'].min()

In [None]:
df['SIR: Lower Confidence Limit (2.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['SIR: Lower Confidence Limit (2.5%)']])

### Transfusion Rate (Facility)
Data type: object

Value count: 7626

Number of unique values: 718

Percent unique values: 2.31%

Ranges of values: 0 - 

Length of values: 1 - 5, 13

Description: Lists the facility’s transfusion rate per 100 patient-years.

In [None]:
df['Transfusion Rate (Facility)'].head()

In [None]:
df['Transfusion Rate (Facility)'].tail()

In [None]:
df['Transfusion Rate (Facility)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Transfusion Rate (Facility)'].value_counts()]).value_counts()

In [None]:
df['Transfusion Rate (Facility)'].min()

In [None]:
df['Transfusion Rate (Facility)'].max()

In [None]:
set([(len(str(num))) for num in df['Transfusion Rate (Facility)']])

### Transfusion Rate: Upper Confidence Limit (97.5%)
Data type: object

Value count: 7626

Number of unique values: 1605

Percent unique values: 4.51% 

Ranges of values: 100 - 

Length of values: 2 - 5, 13

Description: Lists the upper confident limit (97.5%) for transfusion rate per 100 patient-years.

In [None]:
df['Transfusion Rate: Upper Confidence Limit (97.5%)'].head()

In [None]:
df['Transfusion Rate: Upper Confidence Limit (97.5%)'].tail()

In [None]:
df['Transfusion Rate: Upper Confidence Limit (97.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Transfusion Rate: Upper Confidence Limit (97.5%)'].value_counts()]).value_counts()

In [None]:
df['Transfusion Rate: Upper Confidence Limit (97.5%)'].min()

In [None]:
df['Transfusion Rate: Upper Confidence Limit (97.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Transfusion Rate: Upper Confidence Limit (97.5%)']])

### Transfusion Rate: Lower Confidence Limit (2.5%)
Data type: object

Value count: 7626

Number of unique values: 414

Percent unique values: 1.18%

Ranges of values: 0.1 - 

Length of values: 1 - 5, 13

Description: Lists the lower confident limit (2.5%) for transfusion rate per 100 patient-years.

In [None]:
df['Transfusion Rate: Lower Confidence Limit (2.5%)'].head()

In [None]:
df['Transfusion Rate: Lower Confidence Limit (2.5%)'].tail()

In [None]:
df['Transfusion Rate: Lower Confidence Limit (2.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Transfusion Rate: Lower Confidence Limit (2.5%)'].value_counts()]).value_counts()

In [None]:
df['Transfusion Rate: Lower Confidence Limit (2.5%)'].min()

In [None]:
df['Transfusion Rate: Lower Confidence Limit (2.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Transfusion Rate: Lower Confidence Limit (2.5%)']])

### Fistula data availability code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 256, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient fistula data available or the reason for why the data is not available. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

Value description: 
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure.
- 256 : The dialysis center does not provide hemodialysis during the reporting period. 
- 258 : The dialysis center was not open long enough to supply sufficient measure data. 

In [None]:
df['Fistula data availability code'].head()

In [None]:
df['Fistula data availability code'].tail()

In [None]:
df['Fistula data availability code'].count()

In [None]:
df['Fistula data availability code'].value_counts()

### Fistula Category Text
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Worse than Expected, Better than Expected, Not Available

Length of values: 11, 13, 19, 20

Description: Fistula category. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

Value description:
- As Expected          : Patient fistulas categorized as “As Expected”       
- Worse than Expected  :  Patient fistulas categorized as “Worse than Expected”
- Better than Expected : Patient fistulas categorized as “Better than Expected"
- Not Available 

In [None]:
df['Fistula Category Text'].head()

In [None]:
df['Fistula Category Text'].tail()

In [None]:
df['Fistula Category Text'].count()

In [None]:
df['Fistula Category Text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['Fistula Category Text']])

### Number of Patients included in fistula summary
Data type: object

Value count: 7626

Number of unique values: 293

Percent unique values: 0.564%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility’s fistula summary. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [None]:
df['Number of Patients included in fistula summary'].head()

In [None]:
df['Number of Patients included in fistula summary'].tail()

In [None]:
df['Number of Patients included in fistula summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of Patients included in fistula summary'].value_counts()]).value_counts()

In [None]:
df['Number of Patients included in fistula summary'].min()

In [None]:
df['Number of Patients included in fistula summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of Patients included in fistula summary']])

### Fistula Rate (Facility)
Data type: object

Value count: 7626

Number of unique values: 588

Percent unique values: 1.17%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the facility’s fistula rate per 100 patient-years. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [None]:
df['Fistula Rate (Facility)'].head()

In [None]:
df['Fistula Rate (Facility)'].tail()

In [None]:
df['Fistula Rate (Facility)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Fistula Rate (Facility)'].value_counts()]).value_counts()

In [None]:
df['Fistula Rate (Facility)'].min()

In [None]:
df['Fistula Rate (Facility)'].max()

In [None]:
set([(len(str(num))) for num in df['Fistula Rate (Facility)']])

### Fistula Rate: Upper Confidence Limit (97.5%)
Data type: object

Value count: 7626

Number of unique values: 488

Percent unique values: 0.905%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the upper confident limit (97.5%) for fistula rate per 100 patient-years. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [None]:
df['Fistula Rate: Upper Confidence Limit (97.5%)'].head()

In [None]:
df['Fistula Rate: Upper Confidence Limit (97.5%)'].tail()

In [None]:
df['Fistula Rate: Upper Confidence Limit (97.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Fistula Rate: Upper Confidence Limit (97.5%)'].value_counts()]).value_counts()

In [None]:
df['Fistula Rate: Upper Confidence Limit (97.5%)'].min()

In [None]:
df['Fistula Rate: Upper Confidence Limit (97.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Fistula Rate: Upper Confidence Limit (97.5%)']])

### Fistula Rate: Lower Confidence Limit (2.5%)
Data type: object

Value count: 7626

Number of unique values: 728

Percent unique values: 1.31%

Ranges of values: 0.1 - 

Length of values: 1 - 4, 13

Description: Lists the lower confident limit (2.5%) for fistula rate per 100 patient-years. The best type of long-term access is an AV fistula. A surgeon connects an artery to a vein, usually in your arm, to create an AV fistula. An artery is a blood vessel that carries blood away from your heart. A vein is a blood vessel that carries blood back toward your heart. When the surgeon connects an artery to a vein, the vein grows wider and thicker, making it easier to place the needles for dialysis. The AV fistula also has a large diameter that allows your blood to flow out and back into your body quickly. The goal is to allow high blood flow so that the largest amount of blood can pass through the dialyzer.

In [None]:
df['Fistula Rate: Lower Confidence Limit (2.5%)'].head()

In [None]:
df['Fistula Rate: Lower Confidence Limit (2.5%)'].tail()

In [None]:
df['Fistula Rate: Lower Confidence Limit (2.5%)'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Fistula Rate: Lower Confidence Limit (2.5%)'].value_counts()]).value_counts()

In [None]:
df['Fistula Rate: Lower Confidence Limit (2.5%)'].min()

In [None]:
df['Fistula Rate: Lower Confidence Limit (2.5%)'].max()

In [None]:
set([(len(str(num))) for num in df['Fistula Rate: Lower Confidence Limit (2.5%)']])

### Number of patients in long term catheter summary
Data type: object

Value count: 7626

Number of unique values: 293

Percent unique values: 0.564%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility’s long term catheter summary. 

In [None]:
df['Number of patients in long term catheter summary'].head()

In [None]:
df['Number of patients in long term catheter summary'].tail()

In [None]:
df['Number of patients in long term catheter summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients in long term catheter summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients in long term catheter summary'].min()

In [None]:
df['Number of patients in long term catheter summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients in long term catheter summary']])

### Number of patient months in long term catheter summary
Data type: object

Value count: 7626

Number of unique values: 1781

Percent unique values: 5.55%

Ranges of values: 1 - 

Length of values: 1 - 4, 13

Description: Lists the number of patient-months included in the facility’s long term catheter summary. 

In [None]:
df['Number of patient months in long term catheter summary'].head()

In [None]:
df['Number of patient months in long term catheter summary'].tail()

In [None]:
df['Number of patient months in long term catheter summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patient months in long term catheter summary'].value_counts()]).value_counts()

In [None]:
df['Number of patient months in long term catheter summary'].min()

In [None]:
df['Number of patient months in long term catheter summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patient months in long term catheter summary']])

### Long term catheter Data Availability Code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 256, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient long term catheter data available or the reason for why the data is not available.

In [None]:
df['Long term catheter Data Availability Code'].head()

In [None]:
df['Long term catheter Data Availability Code'].tail()

In [None]:
df['Long term catheter Data Availability Code'].count()

In [None]:
df['Long term catheter Data Availability Code'].value_counts()

### Percentage of Adult patients with long term catheter in use
Data type: object

Value count: 7626

Number of unique values: 68

Percent unique values: 0.210%

Ranges of values: 0 - 

Length of values: 1, 2, 13

Description: Lists the percentage of adult patients with long term catheter in use.

In [None]:
df['Percentage of Adult patients with long term catheter in use'].head()

In [None]:
df['Percentage of Adult patients with long term catheter in use'].tail()

In [None]:
df['Percentage of Adult patients with long term catheter in use'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Adult patients with long term catheter in use'].value_counts()]).value_counts()

In [None]:
df['Percentage of Adult patients with long term catheter in use'].min()

In [None]:
df['Percentage of Adult patients with long term catheter in use'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Adult patients with long term catheter in use']])

### Number of patients in nPCR summary
Data type: object

Value count: 7626 

Number of unique values: 26

Percent unique values: .105%

Ranges of values: 0 - 

Length of values: 1, 2, 13

Description: 
Lists the number of patients included in the facility's normalized protein catabolic rate (nPCR) summary. The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [None]:
df['Number of patients in nPCR summary'].head()

In [None]:
df['Number of patients in nPCR summary'].tail()

In [None]:
df['Number of patients in nPCR summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients in nPCR summary'].value_counts()]).value_counts()

In [None]:
df['Number of patients in nPCR summary'].min()

In [None]:
df['Number of patients in nPCR summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients in nPCR summary']])

### Number of patient-months in nPCR summary
Data type: object

Value count: 7626

Number of unique values: 74

Percent unique values: 0.525%

Ranges of values: 1 - 

Length of values: 1 - 3, 13

Description: Lists the number of patient-months included in the facility's patients included in the facility's normalized protein catabolic rate (nPCR) summary. The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [None]:
df['Number of patient-months in nPCR summary'].head()

In [None]:
df['Number of patient-months in nPCR summary'].tail()

In [None]:
df['Number of patient-months in nPCR summary'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patient-months in nPCR summary'].value_counts()]).value_counts()

In [None]:
df['Number of patient-months in nPCR summary'].min()

In [None]:
df['Number of patient-months in nPCR summary'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patient-months in nPCR summary']])

### nPCR Data Availability Code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258, 259

Length of values: 1, 3

Description: Lists whether the facility had sufficient normalized protein catabolic rate (nPCR) data available or the reason for why the data is not available. The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [None]:
df['nPCR Data Availability Code'].head()

In [None]:
df['nPCR Data Availability Code'].tail()

In [None]:
df['nPCR Data Availability Code'].count()

In [None]:
df['nPCR Data Availability Code'].value_counts()

### Percentage of pediatric HD patients with nPCR
Data type: object

Value count: 7626

Number of unique values: 15

Percent unique values: 0.118%

Ranges of values: 100 -

Length of values: 2, 3, 13

Description: Lists the percentage of pediatric HD patients with normalized protein catabolic rate (nPCR). The normalized protein catabolic rate (nPCR) is a formula commonly used to assess dietary protein intake in dialysis patients, as a means towards determining nutritional adequacy, a major problem in many ESRD patients. 
The nPCR is reported in grams of urea nitrogen per kilogram per day.
Most guidelines specify maintaining the protein intake above 1.0 – 1.2 g/kg/day in dialysis patients, with values less than 0.8 g/kg/day being equated with malnutrition.

In [None]:
df['Percentage of pediatric HD patients with nPCR'].head()

In [None]:
df['Percentage of pediatric HD patients with nPCR'].tail()

In [None]:
df['Percentage of pediatric HD patients with nPCR'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of pediatric HD patients with nPCR'].value_counts()]).value_counts()

In [None]:
df['Percentage of pediatric HD patients with nPCR'].min()

In [None]:
df['Percentage of pediatric HD patients with nPCR'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of pediatric HD patients with nPCR']])

### DATE_SWR
Data type: object

Value count: 7626

Number of unique values: 1

Percent unique values: 0%

Ranges of values: "01/01/2015 - 12/31/2017"

Length of values: 23

Description: Years Patient Transplant Waitlist Is Based Upon. Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected.

In [None]:
df['DATE_SWR'].head()

In [None]:
df['DATE_SWR'].tail()

In [None]:
df['DATE_SWR'].value_counts()

In [None]:
set([(len(str(num))) for num in df['DATE_SWR']])

### SWR category text
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Worse than Expected, Better than Expected, Not Available

Length of values: 11, 13, 19, 20

Description: Standardized waitlist ratio (SWR) category text. The standardized waitlist ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

Value description:
- As Expected          : Patient standardized waitlist ratio (SWR) categorized as “As Expected”       
- Worse than Expected  :  Patient standardized waitlist ratio (SWR) categorized as “Worse than Expected”
- Better than Expected : Patient standardized waitlist ratio (SWR) categorized as “Better than Expected"
- Not Available 

In [None]:
df['SWR category text'].head()

In [None]:
df['SWR category text'].tail()

In [None]:
df['SWR category text'].count()

In [None]:
df['SWR category text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['SWR category text']])

### Patient transplant waitlist data availability code
Data type: int64

Value count: 7626

Number of unique values: 5

Percent unique values: 0.0131%

Ranges of values: 1, 199, 201, 255, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient patient transplant waitlist data available or
the reason for why the data is not available.

Value description:
- 1   : N/A
- 199 : Not enough patients to report on this measure. Call the dialysis center to discuss this measure.
- 201 : Data not reported. Call the dialysis center to discuss this quality measure. 
- 255 : Medicare determined that the percentage reported was not accurate.
- 258 : The dialysis center was not open long enough to supply sufficient measure data.

In [None]:
df['Patient transplant waitlist data availability code'].head()

In [None]:
df['Patient transplant waitlist data availability code'].tail()

In [None]:
df['Patient transplant waitlist data availability code'].count()

In [None]:
df['Patient transplant waitlist data availability code'].value_counts()

### 95% C.I. (upper limit) for SWR
Data type: object

Value count: 7626

Number of unique values: 594

Percent unique values: 1.39%

Ranges of values: 0.23 - 

Length of values: 1 - 5, 13

Description: Lists the upper confident interval limit (95%) for Standardized Waitlist Ratio (SWR) rate per 100 patient-years. The Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

In [None]:
df['95% C.I. (upper limit) for SWR'].head()

In [None]:
df['95% C.I. (upper limit) for SWR'].tail()

In [None]:
df['95% C.I. (upper limit) for SWR'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['95% C.I. (upper limit) for SWR'].value_counts()]).value_counts()

In [None]:
df['95% C.I. (upper limit) for SWR'].min()

In [None]:
df['95% C.I. (upper limit) for SWR'].max()

In [None]:
set([(len(str(num))) for num in df['95% C.I. (upper limit) for SWR']])

### 95% C.I. (lower limit) for SWR
Data type: object

Value count: 7626

Number of unique values: 215

Percent unique values: 0.511%

Ranges of values: 0.01 - 

Length of values: 1 - 4, 13

Description: Lists the lower confident interval limit (95%) for Standardized Waitlist Ratio (SWR) rate per 100 patient-years. The Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

In [None]:
df['95% C.I. (lower limit) for SWR'].head()

In [None]:
df['95% C.I. (lower limit) for SWR'].tail()

In [None]:
df['95% C.I. (lower limit) for SWR'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['95% C.I. (lower limit) for SWR'].value_counts()]).value_counts()

In [None]:
df['95% C.I. (lower limit) for SWR'].min()

In [None]:
df['95% C.I. (lower limit) for SWR'].max()

In [None]:
set([(len(str(num))) for num in df['95% C.I. (lower limit) for SWR']])

### Number of patients in this facility for SWR
Data type: object

Value count: 7626

Number of unique values: 146

Percent unique values: 0.236%

Ranges of values: 0 - 

Length of values: 1 - 3, 13 

Description: Lists the number of patients in this facility for Standardized Waitlist Ratio (SWR). The Standardized Waitlist Ratio (SWR) measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received a living donor transplant within the first year of initiating dialysis. SWR is calculated to compare the observed waitlisting rate in the facility to the waitlisting rate that was expected. 

In [None]:
df['Number of patients in this facility for SWR'].head()

In [None]:
df['Number of patients in this facility for SWR'].tail()

In [None]:
df['Number of patients in this facility for SWR'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients in this facility for SWR'].value_counts()]).value_counts()

In [None]:
df['Number of patients in this facility for SWR'].min()

In [None]:
df['Number of patients in this facility for SWR'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients in this facility for SWR']])

### Standardized First Kidney Transplant Waitlist Ratio
Data type: object

Value count: 7626

Number of unique values: 361

Percent unique values: 0.813%

Ranges of values: 0.08 - 

Length of values: 1 - 4, 13

Description: Lists the ratio of standardized waitlist ratio (SWR) in this facility. This measure tracks the number of incident patients at the dialysis facility under the age of 75 listed on the kidney or kidney-pancreas transplant waitlist or who received living donor transplants within the first year of initiating dialysis. 

In [None]:
df['Standardized First Kidney Transplant Waitlist Ratio'].head()

In [None]:
df['Standardized First Kidney Transplant Waitlist Ratio'].tail()

In [None]:
df['Standardized First Kidney Transplant Waitlist Ratio'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Standardized First Kidney Transplant Waitlist Ratio'].value_counts()]).value_counts()

In [None]:
df['Standardized First Kidney Transplant Waitlist Ratio'].min()

In [None]:
df['Standardized First Kidney Transplant Waitlist Ratio'].max()

In [None]:
set([(len(str(num))) for num in df['Standardized First Kidney Transplant Waitlist Ratio']])

### PPPW category text
Data type: object

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: As Expected, Worse than Expected, Better than Expected, Not Available

Length of values: 11, 13, 19, 20

Description: Percentage of prevalent patients waitlisted (PPPW) category text. The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

Value description:
- As Expected          : Patient percentage of prevalent patients waitlisted (PPPW) categorized as “As Expected”       
- Worse than Expected  :  Patient percentage of prevalent patients waitlisted (PPPW) categorized as “Worse than Expected”
- Better than Expected : Patient percentage of prevalent patients waitlisted (PPPW) categorized as “Better than Expected"
- Not Available 

In [None]:
df['PPPW category text'].head()

In [None]:
df['PPPW category text'].tail()

In [None]:
df['PPPW category text'].count()

In [None]:
df['PPPW category text'].value_counts()

In [None]:
set([(len(str(num))) for num in df['PPPW category text']])

### Patient prevalent transplant waitlist data availability code
Data type: int64

Value count: 7626

Number of unique values: 4

Percent unique values: 0%

Ranges of values: 1, 199, 201, 258

Length of values: 1, 3

Description: Lists whether the facility had sufficient patient prevalent transplant waitlist data available or the reason for why the data is not available.

In [None]:
df['Patient prevalent transplant waitlist data availability code'].head()

In [None]:
df['Patient prevalent transplant waitlist data availability code'].tail()

In [None]:
df['Patient prevalent transplant waitlist data availability code'].count()

In [None]:
df['Patient prevalent transplant waitlist data availability code'].value_counts()

### 95% C.I. (upper limit) for PPPW
Data type: object

Value count: 7626

Number of unique values: 767

Percent unique values: 1.22%

Ranges of values: 0 - 

Length of values: 1 - 4, 13

Description: Lists the upper confident interval limit (95%) for Percentage of Prevalent Patients Waitlisted (PPPW) rate per 100 patient-years. The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

In [None]:
df['95% C.I. (upper limit) for PPPW'].head()

In [None]:
df['95% C.I. (upper limit) for PPPW'].tail()

In [None]:
df['95% C.I. (upper limit) for PPPW'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['95% C.I. (upper limit) for PPPW'].value_counts()]).value_counts()

In [None]:
df['95% C.I. (upper limit) for PPPW'].min()

In [None]:
df['95% C.I. (upper limit) for PPPW'].max()

In [None]:
set([(len(str(num))) for num in df['95% C.I. (upper limit) for PPPW']])

### 95% C.I. (lower limit) for PPPW
Data type: object

Value count: 7626

Number of unique values: 356

Percent unique values: 0.918%

Ranges of values: 0.1 - 

Length of values: 1 - 4, 13

Description: Lists the lower confident interval limit (95%) for Percentage of Prevalent Patients Waitlisted (PPPW) rate per 100 patient-years. The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

In [None]:
df['95% C.I. (lower limit) for PPPW'].head()

In [None]:
df['95% C.I. (lower limit) for PPPW'].tail()

In [None]:
df['95% C.I. (lower limit) for PPPW'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['95% C.I. (lower limit) for PPPW'].value_counts()]).value_counts()

In [None]:
df['95% C.I. (lower limit) for PPPW'].min()

In [None]:
df['95% C.I. (lower limit) for PPPW'].max()

In [None]:
set([(len(str(num))) for num in df['95% C.I. (lower limit) for PPPW']])

### Number of patients for PPPW
Data type: object

Value count: 7626

Number of unique values: 270

Percent unique values: 0.603%

Ranges of values: 0 - 

Length of values: 1 - 3, 13

Description: Lists the number of patients included in the facility's percentage of prevalent patients waitlisted (PPPW). The PPPW measure tracks the percentage of patients at each dialysis facility who were on the kidney or kidney-pancreas transplant waiting list. Results are averaged across patients prevalent on the last day of each month during the reporting year, adjusted for age.

In [None]:
df['Number of patients for PPPW'].head()

In [None]:
df['Number of patients for PPPW'].tail()

In [None]:
df['Number of patients for PPPW'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Number of patients for PPPW'].value_counts()]).value_counts()

In [None]:
df['Number of patients for PPPW'].min()

In [None]:
df['Number of patients for PPPW'].max()

In [None]:
set([(len(str(num))) for num in df['Number of patients for PPPW']])

### Percentage of Prevalent Patients Waitlisted
Data type: object

Value count: 7626

Number of unique values: 572

Percent unique values: 1.13%

Ranges of values: 0.1 - 

Length of values: 1 - 4, 13

Description: Lists the percentage of prevalent patients waitlisted.

In [None]:
df['Percentage of Prevalent Patients Waitlisted'].head()

In [None]:
df['Percentage of Prevalent Patients Waitlisted'].tail()

In [None]:
df['Percentage of Prevalent Patients Waitlisted'].describe()

In [None]:
# Percent unique values
# True if facility has a value count of 1 (aka unique)
pd.Series([facility == 1 for facility in df['Percentage of Prevalent Patients Waitlisted'].value_counts()]).value_counts()

In [None]:
df['Percentage of Prevalent Patients Waitlisted'].min()

In [None]:
df['Percentage of Prevalent Patients Waitlisted'].max()

In [None]:
set([(len(str(num))) for num in df['Percentage of Prevalent Patients Waitlisted']])