# Getting Started with the Python Polars Library: Analyzing the U.S. Hospital General Information Dataset

## Loading necessary libraries

In [None]:
# Load Libraries
import polars as pl
import polars.selectors as cs
from plotnine import *
from mizani.labels import label_number

import sys 

# Display system and polars versions
print(f'My system version is {sys.version};\npolars version is {pl.__version__}')

## Loading dataset

In [95]:
url = 'data/Hospital_General_Information.csv'

hos_raw = pl.read_csv(url, null_values='NA', ignore_errors=True)

# inspect output
print(hos_raw)

shape: (5_398, 39)
┌─────────────┬───────────────────────────────────┬──────────────────────────────────┬───────────┬───┬───────────────────────┬────────────────────────┬──────────────────────┬───────────────────┐
│ Facility ID ┆ Facility Name                     ┆ Address                          ┆ City/Town ┆ … ┆ Pt Exp Group Footnote ┆ TE Group Measure Count ┆ Count of Facility TE ┆ TE Group Footnote │
│ ---         ┆ ---                               ┆ ---                              ┆ ---       ┆   ┆ ---                   ┆ ---                    ┆ Measures             ┆ ---               │
│ str         ┆ str                               ┆ str                              ┆ str       ┆   ┆ i64                   ┆ str                    ┆ ---                  ┆ i64               │
│             ┆                                   ┆                                  ┆           ┆   ┆                       ┆                        ┆ str                  ┆                   │
╞═════

## Pulling out column names

In [96]:
hos_raw.columns

['Facility ID',
 'Facility Name',
 'Address',
 'City/Town',
 'State',
 'ZIP Code',
 'County/Parish',
 'Telephone Number',
 'Hospital Type',
 'Hospital Ownership',
 'Emergency Services',
 'Meets criteria for promoting interoperability of EHRs',
 'Meets criteria for birthing friendly designation',
 'Hospital overall rating',
 'Hospital overall rating footnote',
 'MORT Group Measure Count',
 'Count of Facility MORT Measures',
 'Count of MORT Measures Better',
 'Count of MORT Measures No Different',
 'Count of MORT Measures Worse',
 'MORT Group Footnote',
 'Safety Group Measure Count',
 'Count of Facility Safety Measures',
 'Count of Safety Measures Better',
 'Count of Safety Measures No Different',
 'Count of Safety Measures Worse',
 'Safety Group Footnote',
 'READM Group Measure Count',
 'Count of Facility READM Measures',
 'Count of READM Measures Better',
 'Count of READM Measures No Different',
 'Count of READM Measures Worse',
 'READM Group Footnote',
 'Pt Exp Group Measure Count',
 

## Verifying missing values

In [97]:
(
    hos_raw
    .select(cs.all().null_count())
)

Facility ID,Facility Name,Address,City/Town,State,ZIP Code,County/Parish,Telephone Number,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Meets criteria for birthing friendly designation,Hospital overall rating,Hospital overall rating footnote,MORT Group Measure Count,Count of Facility MORT Measures,Count of MORT Measures Better,Count of MORT Measures No Different,Count of MORT Measures Worse,MORT Group Footnote,Safety Group Measure Count,Count of Facility Safety Measures,Count of Safety Measures Better,Count of Safety Measures No Different,Count of Safety Measures Worse,Safety Group Footnote,READM Group Measure Count,Count of Facility READM Measures,Count of READM Measures Better,Count of READM Measures No Different,Count of READM Measures Worse,READM Group Footnote,Pt Exp Group Measure Count,Count of Facility Pt Exp Measures,Pt Exp Group Footnote,TE Group Measure Count,Count of Facility TE Measures,TE Group Footnote
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,1500,3173,0,2789,0,0,0,0,0,3548,0,0,0,0,0,3426,0,0,0,0,0,4343,0,0,3219,0,0,4544


In [99]:
(
    hos_raw
    .select(cs.all().is_null().sum())
)

Facility ID,Facility Name,Address,City/Town,State,ZIP Code,County/Parish,Telephone Number,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Meets criteria for birthing friendly designation,Hospital overall rating,Hospital overall rating footnote,MORT Group Measure Count,Count of Facility MORT Measures,Count of MORT Measures Better,Count of MORT Measures No Different,Count of MORT Measures Worse,MORT Group Footnote,Safety Group Measure Count,Count of Facility Safety Measures,Count of Safety Measures Better,Count of Safety Measures No Different,Count of Safety Measures Worse,Safety Group Footnote,READM Group Measure Count,Count of Facility READM Measures,Count of READM Measures Better,Count of READM Measures No Different,Count of READM Measures Worse,READM Group Footnote,Pt Exp Group Measure Count,Count of Facility Pt Exp Measures,Pt Exp Group Footnote,TE Group Measure Count,Count of Facility TE Measures,TE Group Footnote
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0,0,0,0,0,1500,3173,0,2789,0,0,0,0,0,3548,0,0,0,0,0,3426,0,0,0,0,0,4343,0,0,3219,0,0,4544


## Selecting columns

In [105]:
(
    hos_raw
    .select(cs.ends_with('note'))
)

Hospital overall rating footnote,MORT Group Footnote,Safety Group Footnote,READM Group Footnote,Pt Exp Group Footnote,TE Group Footnote
i64,i64,i64,i64,i64,i64
,,,,,
,,,,,
,,,,,
,,,,,
16,5,5,,5,
…,…,…,…,…,…
19,19,19,19,19,19
19,19,19,19,19,19
5,5,5,5,5,5
16,5,5,5,5,5


In [107]:
(
    hos_raw
    .select(cs.all() - (cs.ends_with('note') | pl.col('Facility ID')))
)

Facility Name,Address,City/Town,State,ZIP Code,County/Parish,Telephone Number,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Meets criteria for birthing friendly designation,Hospital overall rating,MORT Group Measure Count,Count of Facility MORT Measures,Count of MORT Measures Better,Count of MORT Measures No Different,Count of MORT Measures Worse,Safety Group Measure Count,Count of Facility Safety Measures,Count of Safety Measures Better,Count of Safety Measures No Different,Count of Safety Measures Worse,READM Group Measure Count,Count of Facility READM Measures,Count of READM Measures Better,Count of READM Measures No Different,Count of READM Measures Worse,Pt Exp Group Measure Count,Count of Facility Pt Exp Measures,TE Group Measure Count,Count of Facility TE Measures
str,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""SOUTHEAST HEALTH MEDICAL CENTE…","""1108 ROSS CLARK CIRCLE""","""DOTHAN""","""AL""",36301,"""HOUSTON""","""(334) 793-8701""","""Acute Care Hospitals""","""Government - Hospital District…","""Yes""","""Y""","""Y""","""3""","""7""","""7""","""1""","""6""","""0""","""8""","""7""","""2""","""5""","""0""","""11""","""11""","""1""","""8""","""2""","""8""","""8""","""12""","""10"""
"""MARSHALL MEDICAL CENTERS""","""2505 U S HIGHWAY 431 NORTH""","""BOAZ""","""AL""",35957,"""MARSHALL""","""(256) 593-8310""","""Acute Care Hospitals""","""Government - Hospital District…","""Yes""","""Y""",,"""2""","""7""","""6""","""0""","""5""","""1""","""8""","""7""","""0""","""7""","""0""","""11""","""9""","""0""","""8""","""1""","""8""","""8""","""12""","""12"""
"""NORTH ALABAMA MEDICAL CENTER""","""1701 VETERANS DRIVE""","""FLORENCE""","""AL""",35630,"""LAUDERDALE""","""(256) 629-1000""","""Acute Care Hospitals""","""Proprietary""","""Yes""","""Y""","""Y""","""1""","""7""","""7""","""0""","""6""","""1""","""8""","""7""","""3""","""4""","""0""","""11""","""9""","""0""","""7""","""2""","""8""","""8""","""12""","""11"""
"""MIZELL MEMORIAL HOSPITAL""","""702 N MAIN ST""","""OPP""","""AL""",36467,"""COVINGTON""","""(334) 493-3541""","""Acute Care Hospitals""","""Voluntary non-profit - Private""","""Yes""","""Y""",,"""1""","""7""","""3""","""0""","""2""","""1""","""8""","""2""","""0""","""2""","""0""","""11""","""7""","""0""","""7""","""0""","""8""","""8""","""12""","""7"""
"""CRENSHAW COMMUNITY HOSPITAL""","""101 HOSPITAL CIRCLE""","""LUVERNE""","""AL""",36049,"""CRENSHAW""","""(334) 335-3374""","""Acute Care Hospitals""","""Proprietary""","""Yes""","""Y""",,"""Not Available""","""7""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""8""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""11""","""2""","""0""","""2""","""0""","""8""","""Not Available""","""12""","""6"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TRINITY REGIONAL HOSPITAL SACH…","""4750 PRESIDENT GEORGE BUSH HIG…","""SACHSE""","""TX""",75048,"""DALLAS""","""(469) 298-2599""","""Acute Care Hospitals""","""Voluntary non-profit - Private""","""Yes""",,,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""
"""EAST HOUSTON MEDICAL CENTER""","""15149 WALLISVILLE ROAD""","""HOUSTON""","""TX""",77049,"""HARRIS""","""(281) 988-9800""","""Acute Care Hospitals""","""Proprietary""","""Yes""",,,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""
"""ADVANCED DALLAS HOSPITALS AND …","""7502 GREENVILLE AVENUE""","""DALLAS""","""TX""",75231,"""DALLAS""","""(214) 221-6000""","""Acute Care Hospitals""","""Proprietary""","""Yes""",,,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""
"""LEGENT SURGICAL HOSPITAL PLANO""","""4100 MAPLESHADE LANE""","""PLANO""","""TX""",75075,"""COLLIN""","""(972) 265-1050""","""Acute Care Hospitals""","""Proprietary""","""Yes""",,,"""Not Available""","""7""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""8""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""11""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""8""","""Not Available""","""12""","""Not Available"""


In [108]:
hos_raw.columns

['Facility ID',
 'Facility Name',
 'Address',
 'City/Town',
 'State',
 'ZIP Code',
 'County/Parish',
 'Telephone Number',
 'Hospital Type',
 'Hospital Ownership',
 'Emergency Services',
 'Meets criteria for promoting interoperability of EHRs',
 'Meets criteria for birthing friendly designation',
 'Hospital overall rating',
 'Hospital overall rating footnote',
 'MORT Group Measure Count',
 'Count of Facility MORT Measures',
 'Count of MORT Measures Better',
 'Count of MORT Measures No Different',
 'Count of MORT Measures Worse',
 'MORT Group Footnote',
 'Safety Group Measure Count',
 'Count of Facility Safety Measures',
 'Count of Safety Measures Better',
 'Count of Safety Measures No Different',
 'Count of Safety Measures Worse',
 'Safety Group Footnote',
 'READM Group Measure Count',
 'Count of Facility READM Measures',
 'Count of READM Measures Better',
 'Count of READM Measures No Different',
 'Count of READM Measures Worse',
 'READM Group Footnote',
 'Pt Exp Group Measure Count',
 

In [109]:

cols = ['Facility ID',
 'Facility Name',
 'Address',
 'City/Town',
 'State',
 'ZIP Code',
 'County/Parish',
 'Telephone Number',
 'Hospital Type',
 'Hospital Ownership',
 'Emergency Services',
 'Meets criteria for promoting interoperability of EHRs',
 'Meets criteria for birthing friendly designation',
 'Hospital overall rating',
 'Hospital overall rating footnote',
 'MORT Group Measure Count',
 'Count of Facility MORT Measures',
 'Count of MORT Measures Better',
'Count of MORT Measures No Different',
 'Count of MORT Measures Worse',
 'MORT Group Footnote',
 'Safety Group Measure Count',
 'Count of Facility Safety Measures',
 'Count of Safety Measures Better',
 'Count of Safety Measures No Different',
 'Count of Facility Pt Exp Measures',
 'Pt Exp Group Footnote',
 'TE Group Measure Count',
 'Count of Facility TE Measures',
 'TE Group Footnote']

(
    hos_raw
    .select(cols)
)

Facility ID,Facility Name,Address,City/Town,State,ZIP Code,County/Parish,Telephone Number,Hospital Type,Hospital Ownership,Emergency Services,Meets criteria for promoting interoperability of EHRs,Meets criteria for birthing friendly designation,Hospital overall rating,Hospital overall rating footnote,MORT Group Measure Count,Count of Facility MORT Measures,Count of MORT Measures Better,Count of MORT Measures No Different,Count of MORT Measures Worse,MORT Group Footnote,Safety Group Measure Count,Count of Facility Safety Measures,Count of Safety Measures Better,Count of Safety Measures No Different,Count of Facility Pt Exp Measures,Pt Exp Group Footnote,TE Group Measure Count,Count of Facility TE Measures,TE Group Footnote
str,str,str,str,str,i64,str,str,str,str,str,str,str,str,i64,str,str,str,str,str,i64,str,str,str,str,str,i64,str,str,i64
"""010001""","""SOUTHEAST HEALTH MEDICAL CENTE…","""1108 ROSS CLARK CIRCLE""","""DOTHAN""","""AL""",36301,"""HOUSTON""","""(334) 793-8701""","""Acute Care Hospitals""","""Government - Hospital District…","""Yes""","""Y""","""Y""","""3""",,"""7""","""7""","""1""","""6""","""0""",,"""8""","""7""","""2""","""5""","""8""",,"""12""","""10""",
"""010005""","""MARSHALL MEDICAL CENTERS""","""2505 U S HIGHWAY 431 NORTH""","""BOAZ""","""AL""",35957,"""MARSHALL""","""(256) 593-8310""","""Acute Care Hospitals""","""Government - Hospital District…","""Yes""","""Y""",,"""2""",,"""7""","""6""","""0""","""5""","""1""",,"""8""","""7""","""0""","""7""","""8""",,"""12""","""12""",
"""010006""","""NORTH ALABAMA MEDICAL CENTER""","""1701 VETERANS DRIVE""","""FLORENCE""","""AL""",35630,"""LAUDERDALE""","""(256) 629-1000""","""Acute Care Hospitals""","""Proprietary""","""Yes""","""Y""","""Y""","""1""",,"""7""","""7""","""0""","""6""","""1""",,"""8""","""7""","""3""","""4""","""8""",,"""12""","""11""",
"""010007""","""MIZELL MEMORIAL HOSPITAL""","""702 N MAIN ST""","""OPP""","""AL""",36467,"""COVINGTON""","""(334) 493-3541""","""Acute Care Hospitals""","""Voluntary non-profit - Private""","""Yes""","""Y""",,"""1""",,"""7""","""3""","""0""","""2""","""1""",,"""8""","""2""","""0""","""2""","""8""",,"""12""","""7""",
"""010008""","""CRENSHAW COMMUNITY HOSPITAL""","""101 HOSPITAL CIRCLE""","""LUVERNE""","""AL""",36049,"""CRENSHAW""","""(334) 335-3374""","""Acute Care Hospitals""","""Proprietary""","""Yes""","""Y""",,"""Not Available""",16,"""7""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",5,"""8""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",5,"""12""","""6""",
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""670319""","""TRINITY REGIONAL HOSPITAL SACH…","""4750 PRESIDENT GEORGE BUSH HIG…","""SACHSE""","""TX""",75048,"""DALLAS""","""(469) 298-2599""","""Acute Care Hospitals""","""Voluntary non-profit - Private""","""Yes""",,,"""Not Available""",19,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",19,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",19,"""Not Available""","""Not Available""",19
"""670320""","""EAST HOUSTON MEDICAL CENTER""","""15149 WALLISVILLE ROAD""","""HOUSTON""","""TX""",77049,"""HARRIS""","""(281) 988-9800""","""Acute Care Hospitals""","""Proprietary""","""Yes""",,,"""Not Available""",19,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",19,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",19,"""Not Available""","""Not Available""",19
"""670321""","""ADVANCED DALLAS HOSPITALS AND …","""7502 GREENVILLE AVENUE""","""DALLAS""","""TX""",75231,"""DALLAS""","""(214) 221-6000""","""Acute Care Hospitals""","""Proprietary""","""Yes""",,,"""Not Available""",5,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",5,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",5,"""Not Available""","""Not Available""",5
"""670322""","""LEGENT SURGICAL HOSPITAL PLANO""","""4100 MAPLESHADE LANE""","""PLANO""","""TX""",75075,"""COLLIN""","""(972) 265-1050""","""Acute Care Hospitals""","""Proprietary""","""Yes""",,,"""Not Available""",16,"""7""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",5,"""8""","""Not Available""","""Not Available""","""Not Available""","""Not Available""",5,"""12""","""Not Available""",5


## Transforming column names

In [112]:
hos_gen_info = (
    hos_raw
    .rename(lambda col: col.lower().replace(' ', '_').replace('/', '_'))
)

# inspect output
print(hos_gen_info)

shape: (5_398, 39)
┌─────────────┬───────────────────────────────┬──────────────────────────────┬───────────┬───┬───────────────────────┬────────────────────────┬──────────────────────────────┬───────────────────┐
│ facility_id ┆ facility_name                 ┆ address                      ┆ city_town ┆ … ┆ pt_exp_group_footnote ┆ te_group_measure_count ┆ count_of_facility_te_measure ┆ te_group_footnote │
│ ---         ┆ ---                           ┆ ---                          ┆ ---       ┆   ┆ ---                   ┆ ---                    ┆ s                            ┆ ---               │
│ str         ┆ str                           ┆ str                          ┆ str       ┆   ┆ i64                   ┆ str                    ┆ ---                          ┆ i64               │
│             ┆                               ┆                              ┆           ┆   ┆                       ┆                        ┆ str                          ┆                   │
╞═════

In [121]:
hospital = (
    hos_gen_info
    .select('facility_name','hospital_overall_rating' , cs.contains('count').exclude('county_parish'))
)

print(hospital.head())

shape: (5, 21)
┌───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───┬───────────────────────┬──────────────────────┬──────────────────────┬──────────────────────┐
│ facility_name         ┆ hospital_overall_rati ┆ mort_group_measure_co ┆ count_of_facility_mor ┆ … ┆ pt_exp_group_measure_ ┆ count_of_facility_pt ┆ te_group_measure_cou ┆ count_of_facility_te │
│ ---                   ┆ ng                    ┆ unt                   ┆ t_measure…            ┆   ┆ count                 ┆ _exp_measu…          ┆ nt                   ┆ _measures            │
│ str                   ┆ ---                   ┆ ---                   ┆ ---                   ┆   ┆ ---                   ┆ ---                  ┆ ---                  ┆ ---                  │
│                       ┆ str                   ┆ str                   ┆ str                   ┆   ┆ str                   ┆ str                  ┆ str                  ┆ str                  │
╞═════════

In [124]:
hospital = (
    hospital
    .with_columns(cs.string().str.to_titlecase())
)

# inspect output
print(hospital)

shape: (5_398, 21)
┌───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───┬───────────────────────┬──────────────────────┬──────────────────────┬──────────────────────┐
│ facility_name         ┆ hospital_overall_rati ┆ mort_group_measure_co ┆ count_of_facility_mor ┆ … ┆ pt_exp_group_measure_ ┆ count_of_facility_pt ┆ te_group_measure_cou ┆ count_of_facility_te │
│ ---                   ┆ ng                    ┆ unt                   ┆ t_measure…            ┆   ┆ count                 ┆ _exp_measu…          ┆ nt                   ┆ _measures            │
│ str                   ┆ ---                   ┆ ---                   ┆ ---                   ┆   ┆ ---                   ┆ ---                  ┆ ---                  ┆ ---                  │
│                       ┆ str                   ┆ str                   ┆ str                   ┆   ┆ str                   ┆ str                  ┆ str                  ┆ str                  │
╞═════

In [129]:
(
    hospital
    .with_columns(hospital_overall_rating=pl.col('hospital_overall_rating').cast(pl.Int64, strict=False))
)

facility_name,hospital_overall_rating,mort_group_measure_count,count_of_facility_mort_measures,count_of_mort_measures_better,count_of_mort_measures_no_different,count_of_mort_measures_worse,safety_group_measure_count,count_of_facility_safety_measures,count_of_safety_measures_better,count_of_safety_measures_no_different,count_of_safety_measures_worse,readm_group_measure_count,count_of_facility_readm_measures,count_of_readm_measures_better,count_of_readm_measures_no_different,count_of_readm_measures_worse,pt_exp_group_measure_count,count_of_facility_pt_exp_measures,te_group_measure_count,count_of_facility_te_measures
str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""Southeast Health Medical Cente…",3,"""7""","""7""","""1""","""6""","""0""","""8""","""7""","""2""","""5""","""0""","""11""","""11""","""1""","""8""","""2""","""8""","""8""","""12""","""10"""
"""Marshall Medical Centers""",2,"""7""","""6""","""0""","""5""","""1""","""8""","""7""","""0""","""7""","""0""","""11""","""9""","""0""","""8""","""1""","""8""","""8""","""12""","""12"""
"""North Alabama Medical Center""",1,"""7""","""7""","""0""","""6""","""1""","""8""","""7""","""3""","""4""","""0""","""11""","""9""","""0""","""7""","""2""","""8""","""8""","""12""","""11"""
"""Mizell Memorial Hospital""",1,"""7""","""3""","""0""","""2""","""1""","""8""","""2""","""0""","""2""","""0""","""11""","""7""","""0""","""7""","""0""","""8""","""8""","""12""","""7"""
"""Crenshaw Community Hospital""",,"""7""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""8""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""11""","""2""","""0""","""2""","""0""","""8""","""Not Available""","""12""","""6"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Trinity Regional Hospital Sach…",,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""
"""East Houston Medical Center""",,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""
"""Advanced Dallas Hospitals And …",,"""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""Not Available"""
"""Legent Surgical Hospital Plano""",,"""7""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""8""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""11""","""Not Available""","""Not Available""","""Not Available""","""Not Available""","""8""","""Not Available""","""12""","""Not Available"""


In [138]:
hospital_df = (
    hospital
    .with_columns(
        (cs.contains('count') | pl.col('hospital_overall_rating')).cast(pl.Int64, strict=False)
    )
    
)

## Pulling out a single column

In [134]:
(
    hospital_df
    .get_column('hospital_overall_rating')
    .unique()
)

hospital_overall_rating
i64
""
1.0
2.0
3.0
4.0
5.0


## Filtering the data

In [141]:
stars = (
    hospital_df
    .filter(pl.col('hospital_overall_rating').is_not_null())
)


print(stars)

shape: (2_834, 21)
┌───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───┬───────────────────────┬──────────────────────┬──────────────────────┬──────────────────────┐
│ facility_name         ┆ hospital_overall_rati ┆ mort_group_measure_co ┆ count_of_facility_mor ┆ … ┆ pt_exp_group_measure_ ┆ count_of_facility_pt ┆ te_group_measure_cou ┆ count_of_facility_te │
│ ---                   ┆ ng                    ┆ unt                   ┆ t_measure…            ┆   ┆ count                 ┆ _exp_measu…          ┆ nt                   ┆ _measures            │
│ str                   ┆ ---                   ┆ ---                   ┆ ---                   ┆   ┆ ---                   ┆ ---                  ┆ ---                  ┆ ---                  │
│                       ┆ i64                   ┆ i64                   ┆ i64                   ┆   ┆ i64                   ┆ i64                  ┆ i64                  ┆ i64                  │
╞═════

In [146]:
(
    hospital_df
    .filter(pl.col('hospital_overall_rating').is_between(3, 5))
)

facility_name,hospital_overall_rating,mort_group_measure_count,count_of_facility_mort_measures,count_of_mort_measures_better,count_of_mort_measures_no_different,count_of_mort_measures_worse,safety_group_measure_count,count_of_facility_safety_measures,count_of_safety_measures_better,count_of_safety_measures_no_different,count_of_safety_measures_worse,readm_group_measure_count,count_of_facility_readm_measures,count_of_readm_measures_better,count_of_readm_measures_no_different,count_of_readm_measures_worse,pt_exp_group_measure_count,count_of_facility_pt_exp_measures,te_group_measure_count,count_of_facility_te_measures
str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""Southeast Health Medical Cente…",3,7,7,1,6,0,8,7,2,5,0,11,11,1,8,2,8,8,12,10
"""St. Vincent's East""",3,7,7,0,7,0,8,7,1,6,0,11,9,0,8,1,8,8,12,8
"""Shelby Baptist Medical Center""",3,7,7,0,7,0,8,7,1,6,0,11,9,0,8,1,8,8,12,6
"""University Of Alabama Hospital""",3,7,7,1,5,1,8,8,2,5,1,11,11,1,9,1,8,8,12,10
"""Cullman Regional Medical Cente…",3,7,6,0,5,1,8,7,1,6,0,11,8,0,8,0,8,8,12,11
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Baylor Scott & White Medical C…",5,7,4,0,4,0,8,3,1,2,0,11,9,1,8,0,8,8,12,11
"""Houston Methodist The Woodland…",5,7,7,4,3,0,8,8,4,4,0,11,11,0,11,0,8,8,12,10
"""Texas Health Hospital Frisco""",3,7,2,0,2,0,8,5,0,5,0,11,5,1,4,0,8,8,12,8
"""Methodist Midlothian Medical C…",4,7,,,,,8,3,0,3,0,11,3,0,3,0,8,8,12,7


In [150]:
(
    hospital_df
    .filter(pl.col('hospital_overall_rating') >= 3, pl.col('mort_group_measure_count') >= 5)
)

facility_name,hospital_overall_rating,mort_group_measure_count,count_of_facility_mort_measures,count_of_mort_measures_better,count_of_mort_measures_no_different,count_of_mort_measures_worse,safety_group_measure_count,count_of_facility_safety_measures,count_of_safety_measures_better,count_of_safety_measures_no_different,count_of_safety_measures_worse,readm_group_measure_count,count_of_facility_readm_measures,count_of_readm_measures_better,count_of_readm_measures_no_different,count_of_readm_measures_worse,pt_exp_group_measure_count,count_of_facility_pt_exp_measures,te_group_measure_count,count_of_facility_te_measures
str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""Southeast Health Medical Cente…",3,7,7,1,6,0,8,7,2,5,0,11,11,1,8,2,8,8,12,10
"""St. Vincent's East""",3,7,7,0,7,0,8,7,1,6,0,11,9,0,8,1,8,8,12,8
"""Shelby Baptist Medical Center""",3,7,7,0,7,0,8,7,1,6,0,11,9,0,8,1,8,8,12,6
"""University Of Alabama Hospital""",3,7,7,1,5,1,8,8,2,5,1,11,11,1,9,1,8,8,12,10
"""Cullman Regional Medical Cente…",3,7,6,0,5,1,8,7,1,6,0,11,8,0,8,0,8,8,12,11
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Baylor Scott & White Medical C…",5,7,4,0,4,0,8,3,1,2,0,11,9,1,8,0,8,8,12,11
"""Houston Methodist The Woodland…",5,7,7,4,3,0,8,8,4,4,0,11,11,0,11,0,8,8,12,10
"""Texas Health Hospital Frisco""",3,7,2,0,2,0,8,5,0,5,0,11,5,1,4,0,8,8,12,8
"""Methodist Midlothian Medical C…",4,7,,,,,8,3,0,3,0,11,3,0,3,0,8,8,12,7


In [153]:
(
    hospital_df
    .filter((pl.col('hospital_overall_rating') >= 3) & (pl.col('mort_group_measure_count') >= 5))
)

facility_name,hospital_overall_rating,mort_group_measure_count,count_of_facility_mort_measures,count_of_mort_measures_better,count_of_mort_measures_no_different,count_of_mort_measures_worse,safety_group_measure_count,count_of_facility_safety_measures,count_of_safety_measures_better,count_of_safety_measures_no_different,count_of_safety_measures_worse,readm_group_measure_count,count_of_facility_readm_measures,count_of_readm_measures_better,count_of_readm_measures_no_different,count_of_readm_measures_worse,pt_exp_group_measure_count,count_of_facility_pt_exp_measures,te_group_measure_count,count_of_facility_te_measures
str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""Southeast Health Medical Cente…",3,7,7,1,6,0,8,7,2,5,0,11,11,1,8,2,8,8,12,10
"""St. Vincent's East""",3,7,7,0,7,0,8,7,1,6,0,11,9,0,8,1,8,8,12,8
"""Shelby Baptist Medical Center""",3,7,7,0,7,0,8,7,1,6,0,11,9,0,8,1,8,8,12,6
"""University Of Alabama Hospital""",3,7,7,1,5,1,8,8,2,5,1,11,11,1,9,1,8,8,12,10
"""Cullman Regional Medical Cente…",3,7,6,0,5,1,8,7,1,6,0,11,8,0,8,0,8,8,12,11
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Baylor Scott & White Medical C…",5,7,4,0,4,0,8,3,1,2,0,11,9,1,8,0,8,8,12,11
"""Houston Methodist The Woodland…",5,7,7,4,3,0,8,8,4,4,0,11,11,0,11,0,8,8,12,10
"""Texas Health Hospital Frisco""",3,7,2,0,2,0,8,5,0,5,0,11,5,1,4,0,8,8,12,8
"""Methodist Midlothian Medical C…",4,7,,,,,8,3,0,3,0,11,3,0,3,0,8,8,12,7


In [157]:
(
    stars
    # .select('facility_name', 'hospital_overall_rating')
    .group_by('hospital_overall_rating')
    .agg(total=pl.col('facility_name').count())
    .sort('total', descending=True)
)

hospital_overall_rating,total
i64,u32
3,824
4,763
2,590
5,381
1,276


In [168]:
hospital_quality_metrics_1 = (
    hos_raw
    # clean and transform column names
    .rename(lambda col: col.lower().replace(' ', '_').replace('/', '_'))
    # convert string row values to title case
    .with_columns(cs.string().str.to_titlecase())
    # working with columns
    .select(cs.all() - (cs.ends_with('note') | pl.col('facility_id') | pl.col('address')))
)

# inspect output
print(hospital_quality_metrics_1)

shape: (5_398, 31)
┌─────────────────────────────────┬───────────┬───────┬──────────┬───┬────────────────────────────┬─────────────────────────────────┬────────────────────────┬───────────────────────────────┐
│ facility_name                   ┆ city_town ┆ state ┆ zip_code ┆ … ┆ pt_exp_group_measure_count ┆ count_of_facility_pt_exp_measu… ┆ te_group_measure_count ┆ count_of_facility_te_measures │
│ ---                             ┆ ---       ┆ ---   ┆ ---      ┆   ┆ ---                        ┆ ---                             ┆ ---                    ┆ ---                           │
│ str                             ┆ str       ┆ str   ┆ i64      ┆   ┆ str                        ┆ str                             ┆ str                    ┆ str                           │
╞═════════════════════════════════╪═══════════╪═══════╪══════════╪═══╪════════════════════════════╪═════════════════════════════════╪════════════════════════╪═══════════════════════════════╡
│ Southeast Health Medical

In [182]:
hospital_quality_metrics_2 = (
    hos_raw
    # clean and transform column names
    .rename(lambda col: col.lower().replace(' ', '_').replace('/', '_'))
    # convert string row values to title case
    .with_columns(cs.string().str.to_titlecase())
    # select desired columns
    .select(
        'facility_name', 'state', 'hospital_type', 'emergency_services',
        'meets_criteria_for_promoting_interoperability_of_ehrs',
        'meets_criteria_for_birthing_friendly_designation',
        'hospital_overall_rating',
        # select all column names containing 'count' and remove county_parish; convert to integer
        cs.contains('count').exclude('county_parish').cast(pl.Int64, strict=False)
    )
    # clean and trasform column names further
    .rename(lambda col: col.replace('count_of_', '').replace('_count', ''))
    .rename(
        {
        'facility_name': 'hospital',
        'meets_criteria_for_promoting_interoperability_of_ehrs': 'promoting_interoperability',
        'meets_criteria_for_birthing_friendly_designation': 'birthing_friendly'
        }
    )
)

# inspect output
print(hospital_quality_metrics_2.head())

shape: (5, 26)
┌─────────────────────────────────┬───────┬──────────────────────┬────────────────────┬───┬──────────────────────┬──────────────────────────┬──────────────────┬──────────────────────┐
│ hospital                        ┆ state ┆ hospital_type        ┆ emergency_services ┆ … ┆ pt_exp_group_measure ┆ facility_pt_exp_measures ┆ te_group_measure ┆ facility_te_measures │
│ ---                             ┆ ---   ┆ ---                  ┆ ---                ┆   ┆ ---                  ┆ ---                      ┆ ---              ┆ ---                  │
│ str                             ┆ str   ┆ str                  ┆ str                ┆   ┆ i64                  ┆ i64                      ┆ i64              ┆ i64                  │
╞═════════════════════════════════╪═══════╪══════════════════════╪════════════════════╪═══╪══════════════════════╪══════════════════════════╪══════════════════╪══════════════════════╡
│ Southeast Health Medical Cente… ┆ Al    ┆ Acute Care Hospitals 