In [None]:
# Medicare Advantage vs Traditional Medicare: CSPUF 2022 Analysis

import pandas as pd
import numpy as np

# Load dataset
file_path = "cspuf2022.csv"
df = pd.read_csv(file_path)

# Convert relevant columns to numeric
for col in ['CSP_NCHRNCND', 'CSP_AGE', 'MPAEVNTS']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Add convenience flags
df["traditional_medicare"] = (df["PAMTCARE"] > 0).astype(int)
df["medicare_advantage"] = (df["PAMTMADV"] > 0).astype(int)
df["chronic_conditions"] = df["CSP_NCHRNCND"]

# Weighted mean function
def weighted_mean(series, weights):
    return (series * weights).sum() / weights[series.notnull()].sum()

# Compare out-of-pocket costs by income level
poor_mask = df['CSP_INCOME'] == 1
rich_mask = df['CSP_INCOME'] == 2

poor_oop = weighted_mean(df.loc[poor_mask, 'PAMTOOP'], df.loc[poor_mask, 'CSPUFWGT'])
rich_oop = weighted_mean(df.loc[rich_mask, 'PAMTOOP'], df.loc[rich_mask, 'CSPUFWGT'])

print(f"Poor seniors weighted OOP: ${poor_oop:.2f}")
print(f"Richer seniors weighted OOP: ${rich_oop:.2f}")

# By plan type and income
pt = df['PAMTCARE'] > 0
ma = df['PAMTMADV'] > 0

poor_traditional = weighted_mean(df.loc[poor_mask & pt, 'PAMTOOP'], df.loc[poor_mask & pt, 'CSPUFWGT'])
poor_advantage = weighted_mean(df.loc[poor_mask & ma, 'PAMTOOP'], df.loc[poor_mask & ma, 'CSPUFWGT'])
rich_traditional = weighted_mean(df.loc[rich_mask & pt, 'PAMTOOP'], df.loc[rich_mask & pt, 'CSPUFWGT'])
rich_advantage = weighted_mean(df.loc[rich_mask & ma, 'PAMTOOP'], df.loc[rich_mask & ma, 'CSPUFWGT'])

print("\nWeighted Average Out-of-Pocket Costs:")
print(f"Poor Traditional: ${poor_traditional:.2f}, Poor MA: ${poor_advantage:.2f}")
print(f"Rich Traditional: ${rich_traditional:.2f}, Rich MA: ${rich_advantage:.2f}")

# Utilization patterns
def weighted_event_means(column):
    trad = weighted_mean(df.loc[pt, column], df.loc[pt, 'CSPUFWGT'])
    adv = weighted_mean(df.loc[ma, column], df.loc[ma, 'CSPUFWGT'])
    return trad, adv

for event in ['IPAEVNTS', 'MPAEVNTS', 'OPAEVNTS', 'PMAEVNTS']:
    trad, adv = weighted_event_means(event)
    print(f"\n{event}: Traditional = {trad:.2f}, MA = {adv:.2f}")

# Chronic conditions and age group distributions
age_dist_trad = df.loc[pt].groupby('CSP_AGE')['CSPUFWGT'].sum()
age_dist_ma = df.loc[ma].groupby('CSP_AGE')['CSPUFWGT'].sum()

chronic_dist_trad = df.loc[pt].groupby('CSP_NCHRNCND')['CSPUFWGT'].sum()
chronic_dist_ma = df.loc[ma].groupby('CSP_NCHRNCND')['CSPUFWGT'].sum()

print("\nChronic Conditions Distribution (weighted):")
print("Traditional Medicare:")
print(chronic_dist_trad)
print("\nMedicare Advantage:")
print(chronic_dist_ma)

print("\nAge Distribution (weighted):")
print("Traditional Medicare:")
print(age_dist_trad)
print("\nMedicare Advantage:")
print(age_dist_ma)

# Visits by chronic condition level
for conditions in [1, 2, 3]:
    trad_mask = (df["traditional_medicare"] == 1) & (df["chronic_conditions"] == conditions)
    ma_mask = (df["medicare_advantage"] == 1) & (df["chronic_conditions"] == conditions)

    trad_visits = weighted_mean(df.loc[trad_mask, "MPAEVNTS"], df.loc[trad_mask, "CSPUFWGT"])
    ma_visits = weighted_mean(df.loc[ma_mask, "MPAEVNTS"], df.loc[ma_mask, "CSPUFWGT"])

    print(f"\n=== Patients with {conditions} chronic condition(s) ===")
    print(f"Traditional Medicare visits: {trad_visits:.1f}")
    print(f"Medicare Advantage visits: {ma_visits:.1f}")
