In [30]:
## This notebook compares Palliative Care spending (2024) and Total Medicare spending
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

df = pd.read_csv('Foundry Data.csv')

In [88]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Prepare data
X = df[['HOSPC_MDCR_STDZD_PYMT_PC', 'EstimateHouseholdsMedian_income_dollars', 'AMBLNC_MDCR_STDZD_PYMT_PC', 
        'TRTMNTS_MDCR_PYMT_PC', 'BENES_WTH_PTAPTB_CNT',
        'SNF_CVRD_STAYS_PER_1000_BENES', 'HH_EPISODES_PER_1000_BENES', 'ASC_MDCR_STDZD_PYMT_PER_USER', 'BENE_AVG_RISK_SCRE', 'mode_medicare_pricing_for_new_patient', 'FQHC_RHC_MDCR_STDZD_PYMT_PC']]

Y = df['TOT_MDCR_STDZD_PYMT_PC']

# Calculate the standard deviation of HOSPC_MDCR_STDZD_PYMT_PC
hospc_std = df['HOSPC_MDCR_STDZD_PYMT_PC'].std()
print(f"Standard Deviation of HOSPC_MDCR_STDZD_PYMT_PC: {hospc_std}")

# Add STATE fixed effects
state_dummies = pd.get_dummies(df['STATE'], prefix="STATE", drop_first=True).astype(int)


X = pd.concat([X, state_dummies], axis=1)

low_freq_states = state_dummies.sum(axis=0)[state_dummies.sum(axis=0) < 10].index
X = X.drop(columns=low_freq_states)

# Ensure numeric and handle missing values
X = X.apply(pd.to_numeric, errors='coerce').replace([np.inf, -np.inf], np.nan)
Y = pd.to_numeric(Y, errors='coerce')

# Drop rows with missing values
combined = pd.concat([X, Y], axis=1).dropna()
X = combined[X.columns]
Y = combined[Y.name]
df['TOT_MDCR_STDZD_PYMT_PC_ADJ'] = df['TOT_MDCR_STDZD_PYMT_PC'] - df['HOSPC_MDCR_STDZD_PYMT_PC']
Y_adjusted = df.loc[combined.index, 'TOT_MDCR_STDZD_PYMT_PC_ADJ']

from sklearn.preprocessing import StandardScaler

# Select numeric columns excluding dummy variables
numeric_columns = X.select_dtypes(include=[np.number]).columns.difference(state_dummies.columns)

# Scale only the numeric predictors
scaler = StandardScaler()
X_scaled = X.copy()
X_scaled[numeric_columns] = scaler.fit_transform(X[numeric_columns])

# Add constant
X_scaled = sm.add_constant(X_scaled)

print(X_scaled.dtypes)


# Check VIF
vif_data = pd.DataFrame()
vif_data["feature"] = X_scaled.columns
vif_data["VIF"] = [variance_inflation_factor(X_scaled.values, i) for i in range(X_scaled.shape[1])]
print(vif_data)

# Fit the OLS model
model = sm.OLS(Y_adjusted, X_scaled).fit(cov_type='HC3')
print(model.summary())

Standard Deviation of HOSPC_MDCR_STDZD_PYMT_PC: 202.01025438260447
const                                      float64
HOSPC_MDCR_STDZD_PYMT_PC                   float64
EstimateHouseholdsMedian_income_dollars    float64
AMBLNC_MDCR_STDZD_PYMT_PC                  float64
TRTMNTS_MDCR_PYMT_PC                       float64
BENES_WTH_PTAPTB_CNT                       float64
SNF_CVRD_STAYS_PER_1000_BENES              float64
HH_EPISODES_PER_1000_BENES                 float64
ASC_MDCR_STDZD_PYMT_PER_USER               float64
BENE_AVG_RISK_SCRE                         float64
mode_medicare_pricing_for_new_patient      float64
FQHC_RHC_MDCR_STDZD_PYMT_PC                float64
STATE_AL                                     int64
STATE_AR                                     int64
STATE_AZ                                     int64
STATE_CA                                     int64
STATE_CO                                     int64
STATE_FL                                     int64
STATE_GA       