### Hospital Financial Data Preparation

This notebook cleans and validates hospital level financial and operational data.
The output is a clean dataset used for margin and cost driver analysis.

In [1]:
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)
pd.set_option("display.float_format", lambda x: f"{x:,.2f}")


Load raw hospital data from Excel.


In [3]:
file_path = "data/hospital_data.xlsx"
df = pd.read_excel("hafd2024pivot.xlsm", sheet_name="Data")

df.shape


(438, 247)

In [5]:
df.head()

Unnamed: 0,FAC_NO,FAC_NAME,BEG_DATE,END_DATE,DAY_PER,county name,HSA name,HFPA,TYPE_CNTRL,SYSTEM (≥ 3 HOSP),TYPE_HOSP,TYPE_CARE,TEACH_RURL,DSH hosp,OWNER,lic bed size,ltc range,PHONE,ADDRESS,CITY,ZIP_CODE,CEO,ER_DESIG,BED_LIC,lic bed days,BED_AVL,avail bed days,BED_STF,occ beds,adj occ beds,DAY_MCAR_TR,DAY_MCAR_MC,DAY_MCAL_TR,DAY_MCAL_MC,DAY_CNTY,DAY_THRD_TR,DAY_THRD_MC,DAY_OTH_IND,DAY_OTH,DAY_TOT,adj pat days,mcar t adj day,mcar mc adj day,mcal t adj day,mcal mc adj day,co adj day,third t adj day,third mc adj day,oth ind adj day,oth adj day,DIS_MCAR_TR,DIS_MCAR_MC,DIS_MCAL_TR,DIS_MCAL_MC,DIS_CNTY,DIS_THRD_TR,DIS_THRD_MC,DIS_OTH_IND,DIS_OTH,DIS_TOT,BED_ACUTE,BED_PSYCH,BED_CHEM,BED_REHAB,BED_LTC,BED_RESDNT,acute bed days,psych bed days,chem bed days,rehab bed days,ltc bed days,res bed days,DAY_ACUTE,DAY_PSYCH,DAY_CHEM,DAY_REHAB,DAY_LTC,% ltc days,DAY_RESDNT,DIS_ACUTE,DIS_PSYCH,DIS_CHEM,DIS_REHAB,DIS_LTC,DIS_RESDNT,BAS_NURSRY,DAY_NURSRY,DIS_NURSRY,VIS_MCAR_TR,VIS_MCAR_MC,VIS_MCAL_TR,VIS_MCAL_MC,VIS_CNTY,VIS_THRD_TR,VIS_THRD_MC,VIS_OTH_IND,VIS_OTH,VIS_TOT,VIS_ER,VIS_CLIN,VIS_HOME,VIS_REF_OP,DAY_PIPS,OP_ROOM,OP_MIN_IP,OP_MIN_OP,SURG_IP,SURG_OP,NAT_BIRTHS,C_SECTIONS,GR_PT_REV,DED_FR_REV,TOT_CAP_REV,NET_PT_REV,OTH_OP_REV,total op rev,TOT_OP_EXP,NET_FRM_OP,NONOP_REV,NONOP_EXP,INC_TAX,EXT_ITEM,NET_INCOME,GR_IP_MCAR_TR,GR_IP_MCAR_MC,GR_IP_MCAL_TR,GR_IP_MCAL_MC,GR_IP_CNTY,GR_IP_THRD_TR,GR_IP_THRD_MC,GR_IP_OTH_IND,GR_IP_OTH,GR_IP_TOT,GR_OP_MCAR_TR,GR_OP_MCAR_MC,GR_OP_MCAL_TR,GR_OP_MCAL_MC,GR_OP_CNTY,GR_OP_THRD_TR,GR_OP_THRD_MC,GR_OP_OTH_IND,GR_OP_OTH,GR_OP_TOT,C_ADJ_MCAR_TR,C_ADJ_MCAR_MC,C_ADJ_MCAL_TR,C_ADJ_MCAL_MC,DISP_855,C_ADJ_CNTY,C_ADJ_THRD_TR,C_ADJ_THRD_MC,BAD_DEBT,CHAR_HB,CHAR_OTH,SUB_INDGNT,DED_OTH,CAP_REV_MCAR,CAP_REV_MCAL,CAP_REV_CNTY,CAP_REV_THRD,NETRV_MCAR_TR,NETRV_MCAR_MC,NETRV_MCAL_TR,NETRV_MCAL_MC,NETRV_CNTY,NETRV_THRD_TR,NETRV_THRD_MC,NETRV_OTH_IND,NETRV_OTH,DISP_TRNFR,INTER_TFR,CONTRIBTNS,INC_INVEST,DIST_REV,CNTY_APPRO,EXP_DLY,EXP_AMB,EXP_ANC,EXP_PIP,EXP_POP,EXP_RES,EXP_ED,EXP_GEN,EXP_FISC,EXP_ADM,EXP_UNASSG,EXP_SAL,EXP_BEN,EXP_PHYS,EXP_OTHPRO,EXP_SUPP,EXP_PURCH,EXP_DEPRE,EXP_LEASES,EXP_INSUR,EXP_INTRST,EXP_OTH,CUR_ASST,ASST_LIMTD,NET_PPE,CONST_PROG,INV_OTH,INTAN_ASST,TOT_ASST,CUR_LIAB,DEF_CRED,NET_LTDEBT,EQUITY,LIAB_EQ,CASH,ACCTS_REC,ALLOW_UNCOLL,TOT_PPE,MORT_PAY,BOND_PAY,INTER_REC,INTER_PAY,PROD_HRS,PAID_HRS,HOSP_FTE,STDNT_FTE,PRD_HR_MGT,PRD_HR_TCH,PRD_HR_RN,PRD_HR_LVN,PRD_HR_AID,PRD_HR_CLR,PRD_HR_ENV,PRD_HR_OTH,CNT_HR_RN,CNT_HR_OTH,PRD_HR_DLY,PRD_HR_AMB,PRD_HR_ANC,PRD_HR_ED,PRD_HR_GEN,PRD_HR_FIS,PRD_HR_ADM,exp mcare tr,exp mcare mc,exp mcal tr,exp mcal mc,exp cip,exp 3rd tr,exp 3rd mc,exp other ind,exp other
0,106580996,ADVENTIST HEALTH AND RIDEOUT,2024-01-01,2024-12-31,366,Yuba,02 - GOLDEN EMPIRE,227,Non-Profit,ADVENTIST HEALTH SYSTEMS,Comparable,General Acute,,DSH,ADVENTIST HEALTH,200 - 299,No LTC,530-749-4545,726 4TH STREET,MARYSVILLE,95901,CHRIS CHAMPLIN,3,221,80886,221,80886,151,147.89,269.05,23029,6401,2783,14208,0,2073,4460,91,1081,54126,98473.06,37620.01,10152.33,4297.55,29205.43,0.0,3574.58,11019.82,232.01,2460.95,4239,1181,534,3346,0,532,1254,23,263,11372,221,0,0,0,0,0,80886,0,0,0,0,0,54126,0,0,0,0,0.0,0,11372,0,0,0,0,0,14,2688,1438,56128,14863,6837,63239,0,13876,28299,610,5270,189122,73221,14000,0,103860,0,6,230655,258390,1492,2582,1320,560,2257980642,1775129360,0,482851282,12788204,495639486,525727747,-30088261,1293142,19948822,0,0,-48743941,531428648,143954791,57581847,307510385,0,69234499,110640254,1799163,18955929,1241105516,336709489,84365136,31336918,324596322,0,50150251,162730966,2787871,24198173,1016875126,680582369,150827262,73492358,522443290,-1054326,0,89375197,204654947,11680517,0,13604871,0,29522875,0,0,0,0,183705533,76345628,15722692,103294772,0,28327610,61919170,1,13535876,0,-10282003,0,15347,0,0,100566904,48712983,174503039,0,0,0,0,54313300,8038230,88720625,50872666,180714442,59700766,43212586,4087808,70780954,66034712,13035374,6662952,1811008,26732503,52954642,159130127,0,287919481,3835801,3703790,3420921,458010120,271009841,0,240701145,-53700866,458010120,188772,375334363,-301068087,583256567,0,226870131,8814864,29633227,2434976,2788984,1280,0,174854,595198,1054901,0,235371,200654,46584,0,0,127414,724202,389829,745482,0,274766,20867,279830,197212664.7,51866839.26,20199442.74,143594023.52,0.0,27120320.68,62101023.39,1042024.49,9803204.22
1,106150788,ADVENTIST HEALTH BAKERSFIELD,2024-01-01,2024-12-31,366,Kern,09 - CENTRAL,617,Non-Profit,ADVENTIST HEALTH SYSTEMS,Comparable,General Acute,,Non-DSH,ADVENTIST HEALTH,300 - 499,No LTC,661-395-3000,2615 CHESTER AVENUE,BAKERSFIELD,93301,JASON WELLS,0,301,110166,301,110166,198,198.63,384.2,17196,17343,1997,16351,0,5226,13647,108,831,72699,140617.32,31025.02,29331.86,2609.38,32188.12,0.0,10144.94,33947.78,211.52,1693.67,3771,4176,510,4196,0,123,5029,27,113,17945,301,0,0,0,0,0,110166,0,0,0,0,0,72699,0,0,0,0,0.0,0,17945,0,0,0,0,0,0,0,0,54886,42494,3436,54229,0,14014,66615,403,5716,241793,75608,129318,0,48350,0,4,487230,755460,3427,7118,1634,721,3847929036,3149464298,0,698464738,6127516,704592254,704834525,-242271,18318494,23273315,0,0,-5197092,480473043,505830770,54541270,427222804,0,132916824,364444544,1680343,22265470,1989375068,386396450,349670474,16725012,413795986,0,125107111,542134361,1610569,23114005,1858553968,721779666,728087704,53897869,701213631,0,0,207731478,672764430,12717254,0,5349268,0,45922998,0,0,0,0,139223180,121763170,17052596,137648801,0,48313476,217315033,0,17148482,0,0,11499645,4359786,0,0,109687096,58130118,284089603,0,0,201453,216989,77669465,7838539,143726180,23275082,195824566,87356837,58597046,13504436,147942720,120292983,16389210,6663075,2778853,6018053,49466746,477045267,0,212663915,2990093,20514539,287182,713500996,365386488,0,189032918,159081590,713500996,90072238,556082396,-463934094,457826024,487984,0,203669525,481442616,3442116,4165462,1954,0,225763,1263194,1112411,35151,227066,289419,152808,36150,87567,12587,924792,565900,1075160,6753,482486,42800,344225,157406174.85,155341928.03,12940532.5,152712203.83,0.0,46851989.78,164616610.44,597563.85,8240005.71
2,106171049,ADVENTIST HEALTH CLEARLAKE,2024-01-01,2024-12-31,366,Lake,01 - NORTHERN CALIFORNIA,115,Non-Profit,ADVENTIST HEALTH SYSTEMS,Comparable,General Acute,Rural,DSH,ADVENTIST HEALTH,1 - 49,0.1% - 14.9%,707-995-5705,15630 18TH AVENUE,CLEARLAKE,95422,CHARLES KASSIS,0,25,9150,25,9150,18,15.66,75.5,2378,928,25,1554,0,365,347,13,121,5731,27634.42,9171.05,3326.94,651.79,8975.55,0.0,1165.83,3380.79,84.1,497.54,560,214,21,480,0,9,189,4,20,1497,23,0,0,0,2,0,8418,0,0,0,732,0,5523,0,0,0,208,0.04,0,1467,0,0,0,30,0,0,0,0,64694,18789,29330,78459,0,3852,35329,527,2383,233363,21091,211175,0,0,0,2,18420,143700,177,2189,117,27,556262782,379210974,6280305,183332113,6773931,190106044,209729803,-19623759,3442900,830279,0,0,-17011138,47151574,17219828,633538,31666764,0,7863921,7623273,299846,2902540,115361284,134694230,44514337,15883844,151233239,0,17253883,66649667,1639981,9032317,440901498,115131482,41841862,11227859,136345410,-710457,0,13992459,40740588,8892569,0,2679161,0,9070041,0,6280305,0,0,61779132,19484985,5103837,51624975,0,10426453,31044351,-1,3868381,0,17128904,0,3235606,0,0,15422319,76406276,40000405,0,0,0,0,14713229,13011522,40682203,9493849,59368619,32815623,35304696,3930722,13076555,46802610,3359415,4969223,728340,2665817,6708183,99507777,0,42447574,5791940,0,0,147747291,29402837,602456,75291290,42450708,147747291,58739279,83648464,-64190757,103194047,0,59133080,2041466,18891564,1248470,1431103,670,0,114972,145007,134385,68211,272745,475193,0,0,18135,19822,92458,664748,210167,0,102271,60014,118812,66347551.76,22524086.99,6026467.66,66732182.7,0.0,9164384.13,27098935.6,707757.72,4354505.44
3,106150706,ADVENTIST HEALTH DELANO,2024-01-01,2024-12-31,366,Kern,09 - CENTRAL,617,Non-Profit,ADVENTIST HEALTH SYSTEMS,Comparable,General Acute,,DSH,ADVENTIST HEALTH,150 - 199,75.0% - 100.0%,661-725-4800,1401 GARCES HIGHWAY,DELANO,93215,JASON WELLS,0,156,57096,156,57096,69,64.2,126.29,5567,1155,1583,14109,0,647,351,9,75,23496,46223.22,7997.2,2428.01,2386.7,27621.01,0.0,2388.97,1844.69,50.64,495.71,557,181,232,1362,0,38,230,1,13,2614,97,0,0,0,59,0,35502,0,0,0,21594,0,5461,0,0,0,18035,0.77,0,1919,0,0,0,695,0,16,831,552,8352,4418,5124,63546,0,5612,12934,309,3197,103492,29023,28615,0,45823,0,4,26655,69435,238,862,401,181,263969813,163266365,0,100703448,650666,101354114,111157919,-9803805,17183936,7159780,0,0,220351,33642095,7986571,9640403,74739780,0,3801065,3572464,83411,714281,134180070,14685982,8802611,4894471,71577355,0,10233931,15202750,385948,4006695,129789743,34636722,11138571,370368,96361257,-3305794,0,8248612,10000217,991328,0,668461,0,4156623,0,0,0,0,13263860,5545420,16092819,50570277,0,5645539,8482153,-1,1103381,0,0,3018970,8169820,0,0,19727926,12301810,29180362,0,0,0,0,14636649,3963875,25697622,5649675,34239848,13475064,9324105,1857824,8754308,25346302,4281985,536411,508444,1897959,10935669,157490125,0,39911709,663375,0,219300,198284509,15035078,23965,42762206,140463260,198284509,110234712,47628474,-34249212,135919639,0,14590196,1554009,33503170,730774,822646,389,0,28797,124311,183388,57045,184408,93433,37364,8828,11896,1304,246275,140596,201418,0,57638,35716,49131,20231870.35,7028555.13,6084820.76,61253612.57,0.0,5875553.86,7859979.52,196490.55,1976370.26
4,106190323,ADVENTIST HEALTH GLENDALE,2024-01-01,2024-12-31,366,Los Angeles,11 - LOS ANGELES COUNTY,909,Non-Profit,ADVENTIST HEALTH SYSTEMS,Comparable,General Acute,,Non-DSH,ADVENTIST HEALTH,500 +,0.1% - 14.9%,818-409-8000,1509 WILSON TERRACE,GLENDALE,91206,ALICE ISSAI,0,515,188490,441,161406,313,310.87,497.52,43062,11901,16052,15726,0,13948,10479,43,2566,113777,182091.83,60630.37,16853.38,20685.86,31943.53,0.0,16478.31,23657.22,141.84,4074.95,7489,2375,1251,4028,0,1113,2527,5,248,19036,367,80,0,28,40,0,134322,29280,0,10248,14640,0,74129,19989,0,6517,13142,0.12,0,16329,1236,0,531,940,0,30,3108,1842,75888,22958,6166,64276,0,2329,53148,322,4872,229959,61431,36199,0,134129,0,8,477150,397200,3087,3724,1119,786,3867036948,3303944699,0,563092249,9726353,572818602,615072638,-42254036,37666473,40238908,0,0,-44826471,1057432099,358852479,163749632,420096151,0,92350225,270158801,888176,52725083,2416252646,431409609,149329671,47270876,433226572,0,16753246,339747346,2041637,31005345,1450784302,1313523794,441665055,165063070,748894686,0,0,83749268,471837894,22085132,0,5730228,0,51395572,0,0,0,0,166729882,64333347,38945979,102250363,0,20536273,127840357,1,42456047,0,0,8901839,385685,0,0,128410202,40384174,175937589,0,0,684024,9129012,72814242,10204952,139809215,37699228,190712332,82187425,33249008,17151312,87586255,99249944,14495909,5011180,6591949,14550050,64287274,156406192,0,165614131,12834447,0,5636140,340490910,214153587,0,235646807,-109309484,340490910,1669408,653022110,-565764675,541668098,0,0,13711926,245249307,3870926,4364388,1937,22,312342,1031773,1130403,21808,487356,303988,84817,162787,64579,271073,1241458,534161,874195,127552,702123,17403,374034,233063404.62,79550875.97,33033167.85,133579209.93,0.0,17079066.41,95474758.9,458633.17,13107168.15


In [10]:
df.info

<bound method DataFrame.info of         FAC_NO                                           FAC_NAME   BEG_DATE  \
0    106580996                       ADVENTIST HEALTH AND RIDEOUT 2024-01-01   
1    106150788                       ADVENTIST HEALTH BAKERSFIELD 2024-01-01   
2    106171049                         ADVENTIST HEALTH CLEARLAKE 2024-01-01   
3    106150706                            ADVENTIST HEALTH DELANO 2024-01-01   
4    106190323                          ADVENTIST HEALTH GLENDALE 2024-01-01   
..         ...                                                ...        ...   
433  106444013                     WATSONVILLE COMMUNITY HOSPITAL 2024-01-01   
434  106301379                        WEST ANAHEIM MEDICAL CENTER 2024-01-01   
435  106190883                   WHITTIER HOSPITAL MEDICAL CENTER 2023-07-01   
436  106571086                         WOODLAND MEMORIAL HOSPITAL 2023-07-01   
437  106380939  ZUCKERBERG SAN FRANCISCO GENERAL HOSPITAL & TR... 2023-07-01   

      E

Keep only columns needed for cost, volume, and margin analysis.


In [11]:
cols_to_keep = [
    "FAC_NO",
    "FAC_NAME",
    "county name",
    "TYPE_HOSP",
    "TEACH_RURL",
    "lic bed size",
    "BED_LIC",
    "BED_STF",
    "occ beds",
    "DAY_TOT",
    "DIS_TOT",
    "TOT_OP_EXP",
    "NET_PT_REV",
    "NET_INCOME"
]

df_clean = df[cols_to_keep].copy()
df_clean.shape

(438, 14)

In [12]:
df_clean.columns = [
    "facility_id",
    "facility_name",
    "county",
    "hospital_type",
    "teaching_rural",
    "bed_size_category",
    "licensed_beds",
    "staffed_beds",
    "occupied_beds",
    "patient_days",
    "discharges",
    "operating_expense",
    "net_patient_revenue",
    "net_income"
]

df_clean.head()


Unnamed: 0,facility_id,facility_name,county,hospital_type,teaching_rural,bed_size_category,licensed_beds,staffed_beds,occupied_beds,patient_days,discharges,operating_expense,net_patient_revenue,net_income
0,106580996,ADVENTIST HEALTH AND RIDEOUT,Yuba,Comparable,,200 - 299,221,151,147.89,54126,11372,525727747,482851282,-48743941
1,106150788,ADVENTIST HEALTH BAKERSFIELD,Kern,Comparable,,300 - 499,301,198,198.63,72699,17945,704834525,698464738,-5197092
2,106171049,ADVENTIST HEALTH CLEARLAKE,Lake,Comparable,Rural,1 - 49,25,18,15.66,5731,1497,209729803,183332113,-17011138
3,106150706,ADVENTIST HEALTH DELANO,Kern,Comparable,,150 - 199,156,69,64.2,23496,2614,111157919,100703448,220351
4,106190323,ADVENTIST HEALTH GLENDALE,Los Angeles,Comparable,,500 +,515,313,310.87,113777,19036,615072638,563092249,-44826471


Convert numeric columns and handle invalid values.


In [13]:
numeric_cols = [
    "licensed_beds",
    "staffed_beds",
    "occupied_beds",
    "patient_days",
    "discharges",
    "operating_expense",
    "net_patient_revenue",
    "net_income"
]

for col in numeric_cols:
    df_clean[col] = pd.to_numeric(df_clean[col], errors="coerce")

df_clean.dtypes


facility_id              int64
facility_name           object
county                  object
hospital_type           object
teaching_rural          object
bed_size_category       object
licensed_beds            int64
staffed_beds             int64
occupied_beds          float64
patient_days             int64
discharges               int64
operating_expense        int64
net_patient_revenue      int64
net_income               int64
dtype: object

In [14]:
df_clean.head()

Unnamed: 0,facility_id,facility_name,county,hospital_type,teaching_rural,bed_size_category,licensed_beds,staffed_beds,occupied_beds,patient_days,discharges,operating_expense,net_patient_revenue,net_income
0,106580996,ADVENTIST HEALTH AND RIDEOUT,Yuba,Comparable,,200 - 299,221,151,147.89,54126,11372,525727747,482851282,-48743941
1,106150788,ADVENTIST HEALTH BAKERSFIELD,Kern,Comparable,,300 - 499,301,198,198.63,72699,17945,704834525,698464738,-5197092
2,106171049,ADVENTIST HEALTH CLEARLAKE,Lake,Comparable,Rural,1 - 49,25,18,15.66,5731,1497,209729803,183332113,-17011138
3,106150706,ADVENTIST HEALTH DELANO,Kern,Comparable,,150 - 199,156,69,64.2,23496,2614,111157919,100703448,220351
4,106190323,ADVENTIST HEALTH GLENDALE,Los Angeles,Comparable,,500 +,515,313,310.87,113777,19036,615072638,563092249,-44826471


In [15]:
df_clean.isna().sum()


facility_id              0
facility_name            0
county                   2
hospital_type            0
teaching_rural         338
bed_size_category        2
licensed_beds            0
staffed_beds             0
occupied_beds            0
patient_days             0
discharges               0
operating_expense        0
net_patient_revenue      0
net_income               0
dtype: int64

Standardize missing categorical values.


In [16]:
df_clean["county"] = df_clean["county"].fillna("Unknown")
df_clean["bed_size_category"] = df_clean["bed_size_category"].fillna("Unknown")
df_clean["teaching_rural"] = df_clean["teaching_rural"].fillna("Non-Teaching")

In [17]:
df_clean.describe()

Unnamed: 0,facility_id,licensed_beds,staffed_beds,occupied_beds,patient_days,discharges,operating_expense,net_patient_revenue,net_income
count,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0,438.0
mean,106285623.14,218.53,148.1,140.27,50929.27,7677.73,479345593.45,467574434.07,40629398.27
std,136355.93,219.09,180.46,168.77,61872.31,8575.97,1120944173.57,1080874419.91,167580959.9
min,106010735.0,0.0,0.0,0.0,0.0,0.0,4614654.0,0.0,-309897047.0
25%,106190394.0,64.0,41.0,37.22,13546.25,932.75,52933003.25,49229818.5,-2226205.0
50%,106301290.0,153.0,96.0,87.6,31202.5,4066.5,175928872.0,171871521.0,3461539.5
75%,106374084.25,308.5,197.75,186.89,67536.75,12076.25,561685730.0,548821133.75,34697527.5
max,106580996.0,1500.0,1450.0,1339.14,490125.0,45014.0,14465420610.0,14087743886.0,2123717585.0


Check ranges and flag obvious data issues.


In [18]:
df_clean["flag_zero_patient_days"] = df_clean["patient_days"] == 0
df_clean["flag_zero_beds"] = df_clean["licensed_beds"] == 0
df_clean["flag_negative_income"] = df_clean["net_income"] < 0

df_clean[
    ["flag_zero_patient_days", "flag_zero_beds", "flag_negative_income"]
].sum()


flag_zero_patient_days      2
flag_zero_beds              2
flag_negative_income      136
dtype: int64

Save cleaned dataset for downstream analysis.


In [20]:
df_clean.to_csv("hospital_clean_base.csv", index=False)
