In [1]:
# 1. Imports & Settings
import pandas as pd
import numpy as np
import os
import glob
import locale
from dme_dictionary import DATA_DICTIONARY  # Assuming you have a Python file that defines DATA_DICTIONARY

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8, 5)

# Set locale for currency formatting if desired
locale.setlocale(locale.LC_ALL, '')

# Pandas display options
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)        # Avoid wrapping output
pd.set_option('display.expand_frame_repr', False)  # Single-line output for wide DataFrames

# Medicare DME Supplier Analysis

This notebook demonstrates how to:
1. Load Medicare Durable Medical Equipment (DME) supplier data spanning multiple years (2018–2022).
2. Analyze key metrics (submitted charges, Medicare payments, beneficiary counts) over time.
3. Compute year-over-year growth rates and identify significant spikes.
4. Examine high submitted vs. low allowed or paid amounts.
5. Perform peer-group analyses by specialty, state, and combined specialty–state.

We'll highlight outliers that may be worth investigating for potential fraud or anomalies.

## 2. Data Loading
We'll load each year's CSV file from 2018 to 2022, then combine them into a single DataFrame.

In [2]:
data_dir = 'data'       # Adjust if your data folder is elsewhere
years = range(2018, 2023)  # 2018 to 2022

dfs = []

for year in years:
    csv_files = glob.glob(f"{data_dir}/{year}/*.csv")
    if not csv_files:
        print(f"No CSV files found for year {year}")
        continue
    
    # Take the first CSV found
    csv_file = csv_files[0]
    print(f"Loading data from {csv_file}")
    
    # Read the CSV, then add a 'year' column
    df = pd.read_csv(csv_file, low_memory=False)
    df['year'] = year
    
    dfs.append(df)

if dfs:
    combined_df = pd.concat(dfs, ignore_index=True)
    print(f"\nCombined DataFrame shape: {combined_df.shape}")
else:
    combined_df = pd.DataFrame()
    print("No data was loaded.")

Loading data from data/2018/mup_dme_ry24_p05_v10_dy18_supr.csv
Loading data from data/2019/mup_dme_ry24_p05_v10_dy19_supr.csv
Loading data from data/2020/mup_dme_ry24_p05_v10_dy20_supr.csv
Loading data from data/2021/mup_dme_ry24_p05_v10_dy21_supr.csv
Loading data from data/2022/mup_dme_ry24_p05_v10_dy22_supr.csv

Combined DataFrame shape: (352611, 95)


### Basic Exploration
Let's do a quick look at the combined DataFrame's structure, and ensure we have the columns we expect.

In [3]:
if not combined_df.empty:
    print("\nFirst few rows:")
    display(combined_df.head())

    print("\nColumn Names:")
    print(combined_df.columns.tolist())

    print(f"\nNumber of unique suppliers: {combined_df['Suplr_NPI'].nunique()}")
    print("\nSummary of numeric columns:")
    display(combined_df.describe(include=[np.number]))


First few rows:


Unnamed: 0,Suplr_NPI,Suplr_Prvdr_Last_Name_Org,Suplr_Prvdr_First_Name,Suplr_Prvdr_MI,Suplr_Prvdr_Crdntls,Suplr_Prvdr_Gndr,Suplr_Prvdr_Ent_Cd,Suplr_Prvdr_St1,Suplr_Prvdr_St2,Suplr_Prvdr_City,Suplr_Prvdr_State_Abrvtn,Suplr_Prvdr_State_FIPS,Suplr_Prvdr_Zip5,Suplr_Prvdr_RUCA,Suplr_Prvdr_RUCA_Desc,Suplr_Prvdr_Cntry,Suplr_Prvdr_Spclty_Desc,Suplr_Prvdr_Spclty_Srce,Tot_Suplr_HCPCS_Cds,Tot_Suplr_Benes,Tot_Suplr_Clms,Tot_Suplr_Srvcs,Suplr_Sbmtd_Chrgs,Suplr_Mdcr_Alowd_Amt,Suplr_Mdcr_Pymt_Amt,Suplr_Mdcr_Stdzd_Pymt_Amt,DME_Sprsn_Ind,DME_Tot_Suplr_HCPCS_Cds,DME_Tot_Suplr_Benes,DME_Tot_Suplr_Clms,DME_Tot_Suplr_Srvcs,DME_Suplr_Sbmtd_Chrgs,DME_Suplr_Mdcr_Alowd_Amt,DME_Suplr_Mdcr_Pymt_Amt,DME_Suplr_Mdcr_Stdzd_Pymt_Amt,POS_Sprsn_Ind,POS_Tot_Suplr_HCPCS_Cds,POS_Tot_Suplr_Benes,POS_Tot_Suplr_Clms,POS_Tot_Suplr_Srvcs,POS_Suplr_Sbmtd_Chrgs,POS_Suplr_Mdcr_Alowd_Amt,POS_Suplr_Mdcr_Pymt_Amt,POS_Suplr_Mdcr_Stdzd_Pymt_Amt,Drug_Sprsn_Ind,Drug_Tot_Suplr_HCPCS_Cds,Drug_Tot_Suplr_Benes,Drug_Tot_Suplr_Clms,Drug_Tot_Suplr_Srvcs,Drug_Suplr_Sbmtd_Chrgs,Drug_Suplr_Mdcr_Alowd_Amt,Drug_Suplr_Mdcr_Pymt_Amt,Drug_Suplr_Mdcr_Stdzd_Pymt_Amt,Bene_Avg_Age,Bene_Age_LT_65_Cnt,Bene_Age_65_74_Cnt,Bene_Age_75_84_Cnt,Bene_Age_GT_84_Cnt,Bene_Feml_Cnt,Bene_Male_Cnt,Bene_Race_Wht_Cnt,Bene_Race_Black_Cnt,Bene_Race_Api_Cnt,Bene_Race_Hspnc_Cnt,Bene_Race_Natind_Cnt,Bene_Race_Othr_Cnt,Bene_Ndual_Cnt,Bene_Dual_Cnt,Bene_CC_BH_ADHD_OthCD_V1_Pct,Bene_CC_BH_Alcohol_Drug_V1_Pct,Bene_CC_BH_Tobacco_V1_Pct,Bene_CC_BH_Alz_NonAlzdem_V2_Pct,Bene_CC_BH_Anxiety_V1_Pct,Bene_CC_BH_Bipolar_V1_Pct,Bene_CC_BH_Mood_V2_Pct,Bene_CC_BH_Depress_V1_Pct,Bene_CC_BH_PD_V1_Pct,Bene_CC_BH_PTSD_V1_Pct,Bene_CC_BH_Schizo_OthPsy_V1_Pct,Bene_CC_PH_Asthma_V2_Pct,Bene_CC_PH_Afib_V2_Pct,Bene_CC_PH_Cancer6_V2_Pct,Bene_CC_PH_CKD_V2_Pct,Bene_CC_PH_COPD_V2_Pct,Bene_CC_PH_Diabetes_V2_Pct,Bene_CC_PH_HF_NonIHD_V2_Pct,Bene_CC_PH_Hyperlipidemia_V2_Pct,Bene_CC_PH_Hypertension_V2_Pct,Bene_CC_PH_IschemicHeart_V2_Pct,Bene_CC_PH_Osteoporosis_V2_Pct,Bene_CC_PH_Parkinson_V2_Pct,Bene_CC_PH_Arthritis_V2_Pct,Bene_CC_PH_Stroke_TIA_V2_Pct,Bene_Avg_Risk_Scre,year
0,1003000399,"Reconstructive Hand To Shoulder Of Indiana, Llc",,,,,O,13431 Old Meridian Street,Suite 225,Carmel,IN,18,46032,1.0,Metropolitan area core: primary flow within an...,US,General Surgery,Claim-Specialty,15,235.0,301,340,83033.0,70600.4,54545.85,56320.86,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,15.0,235.0,301.0,340.0,83033.0,70600.4,54545.85,56320.86,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.862661,20.0,120.0,74.0,21.0,148.0,87.0,222.0,,,,0.0,,220.0,15.0,,0.051064,0.102128,,0.2,,0.255319,0.234043,,,,0.085106,0.085106,0.131915,0.102128,0.165957,0.217021,0.093617,0.676596,0.668085,0.229787,0.144681,0.0,0.646809,0.046809,0.975801,2018
1,1003000845,James D.Schlenker Mdsc,,,,,O,6311 W 95th St,,Oak Lawn,IL,17,60453,1.0,Metropolitan area core: primary flow within an...,US,Plastic and Reconstructive Surgery,Claim-Specialty,8,19.0,22,22,4168.0,4034.22,3138.12,4635.72,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,8.0,19.0,22.0,22.0,4168.0,4034.22,3138.12,4635.72,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,74.631579,0.0,11.0,,,,,16.0,,0.0,,0.0,,,,0.0,0.0,0.0,,,0.0,,,0.0,0.0,0.0,0.0,,,,,,0.0,0.842105,0.736842,,,,0.736842,,1.065053,2018
2,1003001934,Yi Rui International Corp,,,,,O,4307 8th Ave,,Brooklyn,NY,36,11232,1.0,Metropolitan area core: primary flow within an...,US,Pharmacy,Claim-Specialty,5,,37,796,2739.6,549.08,339.39,407.47,,4.0,,35.0,46.0,2448.28,512.46,321.75,389.83,#,,,,,,,,,*,,,,,,,,,75.285714,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.668578,2018
3,1003002254,Walgreen Co.,,,,,O,5104 Bobby Hicks Hwy,,Gray,TN,47,37615,1.0,Metropolitan area core: primary flow within an...,US,Centralized Flu,Claim-Specialty,10,56.0,150,3681,31078.36,5276.87,3699.85,3835.41,,6.0,56.0,148.0,390.0,26475.28,3226.01,2111.05,2246.61,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,4.0,,12.0,3291.0,4603.08,2050.86,1588.8,1588.8,71.24,,28.0,16.0,,37.0,19.0,56.0,0.0,0.0,0.0,0.0,0.0,45.0,11.0,0.0,,,,0.214286,,0.285714,0.267857,0.0,0.0,0.0,0.196429,,,0.196429,,0.821429,,0.714286,0.839286,0.232143,,0.0,0.446429,,1.171945,2018
4,1003002767,Thomas J Mcelligott Md Pc,,,,,O,2415 Wall St Se,Suite B,Conyers,GA,13,30013,1.0,Metropolitan area core: primary flow within an...,US,Orthopedic Surgery,Claim-Specialty,10,38.0,44,45,4920.71,4808.81,3344.82,3420.32,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,10.0,38.0,44.0,45.0,4920.71,4808.81,3344.82,3420.32,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,73.727273,,15.0,13.0,,26.0,12.0,26.0,11.0,,0.0,0.0,,,,0.0,,,,,,,,0.0,,,,,,,,,,0.631579,0.631579,,,0.0,0.447368,,1.302857,2018



Column Names:
['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org', 'Suplr_Prvdr_First_Name', 'Suplr_Prvdr_MI', 'Suplr_Prvdr_Crdntls', 'Suplr_Prvdr_Gndr', 'Suplr_Prvdr_Ent_Cd', 'Suplr_Prvdr_St1', 'Suplr_Prvdr_St2', 'Suplr_Prvdr_City', 'Suplr_Prvdr_State_Abrvtn', 'Suplr_Prvdr_State_FIPS', 'Suplr_Prvdr_Zip5', 'Suplr_Prvdr_RUCA', 'Suplr_Prvdr_RUCA_Desc', 'Suplr_Prvdr_Cntry', 'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_Spclty_Srce', 'Tot_Suplr_HCPCS_Cds', 'Tot_Suplr_Benes', 'Tot_Suplr_Clms', 'Tot_Suplr_Srvcs', 'Suplr_Sbmtd_Chrgs', 'Suplr_Mdcr_Alowd_Amt', 'Suplr_Mdcr_Pymt_Amt', 'Suplr_Mdcr_Stdzd_Pymt_Amt', 'DME_Sprsn_Ind', 'DME_Tot_Suplr_HCPCS_Cds', 'DME_Tot_Suplr_Benes', 'DME_Tot_Suplr_Clms', 'DME_Tot_Suplr_Srvcs', 'DME_Suplr_Sbmtd_Chrgs', 'DME_Suplr_Mdcr_Alowd_Amt', 'DME_Suplr_Mdcr_Pymt_Amt', 'DME_Suplr_Mdcr_Stdzd_Pymt_Amt', 'POS_Sprsn_Ind', 'POS_Tot_Suplr_HCPCS_Cds', 'POS_Tot_Suplr_Benes', 'POS_Tot_Suplr_Clms', 'POS_Tot_Suplr_Srvcs', 'POS_Suplr_Sbmtd_Chrgs', 'POS_Suplr_Mdcr_Alowd_Amt', 'POS_Suplr_Mdcr

Unnamed: 0,Suplr_NPI,Suplr_Prvdr_Zip5,Suplr_Prvdr_RUCA,Tot_Suplr_HCPCS_Cds,Tot_Suplr_Benes,Tot_Suplr_Clms,Tot_Suplr_Srvcs,Suplr_Sbmtd_Chrgs,Suplr_Mdcr_Alowd_Amt,Suplr_Mdcr_Pymt_Amt,Suplr_Mdcr_Stdzd_Pymt_Amt,DME_Tot_Suplr_HCPCS_Cds,DME_Tot_Suplr_Benes,DME_Tot_Suplr_Clms,DME_Tot_Suplr_Srvcs,DME_Suplr_Sbmtd_Chrgs,DME_Suplr_Mdcr_Alowd_Amt,DME_Suplr_Mdcr_Pymt_Amt,DME_Suplr_Mdcr_Stdzd_Pymt_Amt,POS_Tot_Suplr_HCPCS_Cds,POS_Tot_Suplr_Benes,POS_Tot_Suplr_Clms,POS_Tot_Suplr_Srvcs,POS_Suplr_Sbmtd_Chrgs,POS_Suplr_Mdcr_Alowd_Amt,POS_Suplr_Mdcr_Pymt_Amt,POS_Suplr_Mdcr_Stdzd_Pymt_Amt,Drug_Tot_Suplr_HCPCS_Cds,Drug_Tot_Suplr_Benes,Drug_Tot_Suplr_Clms,Drug_Tot_Suplr_Srvcs,Drug_Suplr_Sbmtd_Chrgs,Drug_Suplr_Mdcr_Alowd_Amt,Drug_Suplr_Mdcr_Pymt_Amt,Drug_Suplr_Mdcr_Stdzd_Pymt_Amt,Bene_Avg_Age,Bene_Age_LT_65_Cnt,Bene_Age_65_74_Cnt,Bene_Age_75_84_Cnt,Bene_Age_GT_84_Cnt,Bene_Feml_Cnt,Bene_Male_Cnt,Bene_Race_Wht_Cnt,Bene_Race_Black_Cnt,Bene_Race_Api_Cnt,Bene_Race_Hspnc_Cnt,Bene_Race_Natind_Cnt,Bene_Race_Othr_Cnt,Bene_Ndual_Cnt,Bene_Dual_Cnt,Bene_CC_BH_ADHD_OthCD_V1_Pct,Bene_CC_BH_Alcohol_Drug_V1_Pct,Bene_CC_BH_Tobacco_V1_Pct,Bene_CC_BH_Alz_NonAlzdem_V2_Pct,Bene_CC_BH_Anxiety_V1_Pct,Bene_CC_BH_Bipolar_V1_Pct,Bene_CC_BH_Mood_V2_Pct,Bene_CC_BH_Depress_V1_Pct,Bene_CC_BH_PD_V1_Pct,Bene_CC_BH_PTSD_V1_Pct,Bene_CC_BH_Schizo_OthPsy_V1_Pct,Bene_CC_PH_Asthma_V2_Pct,Bene_CC_PH_Afib_V2_Pct,Bene_CC_PH_Cancer6_V2_Pct,Bene_CC_PH_CKD_V2_Pct,Bene_CC_PH_COPD_V2_Pct,Bene_CC_PH_Diabetes_V2_Pct,Bene_CC_PH_HF_NonIHD_V2_Pct,Bene_CC_PH_Hyperlipidemia_V2_Pct,Bene_CC_PH_Hypertension_V2_Pct,Bene_CC_PH_IschemicHeart_V2_Pct,Bene_CC_PH_Osteoporosis_V2_Pct,Bene_CC_PH_Parkinson_V2_Pct,Bene_CC_PH_Arthritis_V2_Pct,Bene_CC_PH_Stroke_TIA_V2_Pct,Bene_Avg_Risk_Scre,year
count,352611.0,352611.0,352575.0,352611.0,331904.0,352611.0,352611.0,352611.0,352611.0,352611.0,352611.0,334378.0,312252.0,334378.0,334378.0,334378.0,334378.0,334378.0,334378.0,292989.0,271386.0,292989.0,292989.0,292989.0,292989.0,292989.0,292989.0,279663.0,195592.0,279663.0,279663.0,279663.0,279663.0,279663.0,279663.0,352549.0,112648.0,257838.0,210441.0,90651.0,253106.0,253106.0,297628.0,135898.0,152090.0,153492.0,281658.0,130667.0,179575.0,179575.0,210454.0,110545.0,128558.0,103844.0,184461.0,121693.0,194930.0,182561.0,170846.0,180278.0,151709.0,133002.0,157471.0,131889.0,231256.0,189497.0,289577.0,176141.0,311898.0,315415.0,227186.0,115383.0,163625.0,264873.0,102463.0,352548.0,352611.0
mean,1499823000.0,47761.974436,1.938816,19.162681,180.003712,723.630647,28944.72,432742.4,155850.4,119885.2,118916.6,8.810921,145.596758,642.879322,3905.078,256528.2,85918.94,65502.21,64560.93,6.513569,48.193319,80.856425,3355.593,65016.89,44708.13,34544.46,34604.0,2.651927,15.184762,53.643879,15545.91,39285.6,16972.34,13264.74,13149.45,72.135987,70.165835,93.109103,83.160492,68.880751,126.068106,103.751768,157.496126,38.625859,6.402137,20.363218,0.683773,6.168627,216.677667,81.802339,0.001365,0.056437,0.137417,0.075832,0.270268,0.023278,0.295474,0.268735,0.003625,0.003438,0.011297,0.154784,0.20562,0.17063,0.364954,0.296001,0.738303,0.2454,0.803431,0.854808,0.356955,0.132541,0.004815,0.491985,0.085324,1.75954,2019.933791
std,287777800.0,28443.077792,2.593615,25.02395,1318.464715,5759.18413,1137952.0,6107114.0,2104732.0,1642990.0,1640688.0,17.959107,1210.207291,5472.581913,118374.4,4536824.0,1222667.0,953650.8,952061.6,18.036465,582.654243,1658.046524,177420.9,1399145.0,755867.3,580198.2,596988.0,2.654579,508.361346,2599.424059,1233497.0,2072189.0,987795.6,773439.1,766566.4,4.203287,362.180891,576.926665,551.356612,375.786927,859.590068,663.652059,1086.588625,283.259321,53.643838,181.647129,9.812065,45.984446,1322.832122,533.498517,0.008951,0.076374,0.100486,0.102199,0.102902,0.044075,0.108544,0.102275,0.012957,0.01312,0.044755,0.087742,0.084632,0.135977,0.153952,0.140641,0.251118,0.108972,0.145535,0.146975,0.112958,0.083479,0.017198,0.137264,0.070467,0.65541,1.417299
min,1003000000.0,601.0,1.0,1.0,11.0,11.0,11.0,19.6,16.62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176,2018.0
25%,1255346000.0,25504.0,1.0,8.0,28.0,60.0,477.0,15077.08,4234.21,3093.515,3221.385,3.0,15.0,26.0,49.0,2726.097,650.14,446.2525,478.3675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70.28,13.0,18.0,17.0,11.0,24.0,19.0,23.0,0.0,0.0,0.0,0.0,0.0,29.0,15.0,0.0,0.0,0.074721,0.0,0.20595,0.0,0.225806,0.205128,0.0,0.0,0.0,0.110497,0.15625,0.118056,0.277108,0.203704,0.5375,0.180812,0.714286,0.769231,0.282609,0.091703,0.0,0.4,0.0,1.363045,2019.0
50%,1497926000.0,44125.0,1.0,12.0,53.0,146.0,3592.0,39647.52,11716.18,8781.31,8958.79,5.0,39.0,112.0,243.0,14374.0,2624.245,1843.84,1960.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,15.0,1954.0,3273.83,312.95,223.1,221.22,72.557692,24.0,30.0,26.0,19.0,39.0,31.0,45.0,11.0,0.0,0.0,0.0,0.0,59.0,25.0,0.0,0.04071,0.135802,0.057075,0.259615,0.0,0.282353,0.258065,0.0,0.0,0.0,0.147059,0.201835,0.151899,0.351351,0.277108,0.811594,0.236842,0.806452,0.857143,0.349057,0.130435,0.0,0.474729,0.086397,1.665859,2020.0
75%,1740658000.0,74104.0,1.0,18.0,105.0,315.0,10503.5,97338.4,34440.66,26289.54,26456.89,6.0,81.0,256.0,578.0,35350.16,6448.807,4567.375,4816.86,4.0,19.0,23.0,58.0,8737.85,6100.79,4672.27,4877.14,5.0,16.0,42.0,6352.0,17230.49,5056.03,3889.745,3861.795,74.583333,54.0,55.0,49.0,48.0,74.0,59.0,93.0,25.0,0.0,15.0,0.0,0.0,125.0,54.0,0.0,0.094395,0.193682,0.109195,0.322835,0.042476,0.35,0.320755,0.0,0.0,0.0,0.193548,0.254902,0.19044,0.432432,0.366667,0.92,0.302857,0.894118,0.9375,0.421053,0.173554,0.0,0.564815,0.123711,2.018267,2021.0
max,1993000000.0,99901.0,99.0,538.0,237906.0,685662.0,329072800.0,1836805000.0,399188700.0,311692900.0,311785200.0,313.0,237906.0,685662.0,23161980.0,1830416000.0,208285500.0,162222500.0,162221600.0,277.0,94760.0,255217.0,31566660.0,229118200.0,134360000.0,105564700.0,112616000.0,14.0,98084.0,619016.0,328632700.0,457846800.0,197952800.0,155093500.0,153022000.0,108.0,46481.0,82335.0,81159.0,27931.0,142147.0,95759.0,170138.0,44508.0,4326.0,28959.0,740.0,2859.0,158443.0,79463.0,0.565217,1.636364,1.636364,1.068421,1.727273,1.545455,1.727273,1.636364,0.833333,0.444444,1.636364,1.454545,2.461538,2.380952,3.601562,1.636364,2.589844,3.153846,3.261719,3.753906,2.367188,1.272727,1.144068,1.75,0.846154,16.340466,2022.0


## 3. Mapping Columns to Data Dictionary
We've got a `DATA_DICTIONARY` that provides definitions for each column. Let's map them to the DataFrame's columns.

In [5]:
if not combined_df.empty:
    column_info = {}
    for column in combined_df.columns:
        if column in DATA_DICTIONARY:
            column_info[column] = DATA_DICTIONARY[column]
        else:
            column_info[column] = "Description not available"
    
    # Optionally store in DataFrame attributes (just for reference, not required)
    combined_df.attrs['column_descriptions'] = column_info

    # Display an overview
    print("Data Dictionary Mapping:\n")
    for col in combined_df.columns:
        desc = column_info[col]
        print(f"- {col}: {desc}")

Data Dictionary Mapping:

- Suplr_NPI: Supplier NPI - NPI for the Supplier on the DMEPOS claim
- Suplr_Prvdr_Last_Name_Org: Supplier Last Name/Organization Name - When registered as individual, the Supplier's last name. When registered as organization, this is the organization name
- Suplr_Prvdr_First_Name: Supplier First Name - When registered as individual, the Supplier's first name
- Suplr_Prvdr_MI: Supplier Middle Initial - When registered as individual, the Supplier's middle initial
- Suplr_Prvdr_Crdntls: Supplier Credentials - When registered as individual, these are the Supplier's credentials
- Suplr_Prvdr_Gndr: Supplier Gender - When registered as individual, this is the Supplier's gender
- Suplr_Prvdr_Ent_Cd: Supplier Entity Code - 'I' identifies Suppliers registered as individuals, 'O' identifies Suppliers registered as organizations
- Suplr_Prvdr_St1: Supplier Street 1 - First line of the Supplier's street address
- Suplr_Prvdr_St2: Supplier Street 2 - Second line of the Sup

## 4. Helper: Format Dollar Amounts
A small function to display large numbers with K/M suffixes.

In [6]:
def format_dollar_amount(amount):
    """Return a string formatted with $ and K/M if needed."""
    if amount >= 1_000_000:
        return f"${amount/1_000_000:.1f}M"
    elif amount >= 1_000:
        return f"${amount/1_000:.1f}K"
    else:
        return f"${amount:,.0f}"

# 5. Year-over-Year Growth Analysis
We'll look at *Medicare Payment Amount* by Supplier (NPI) across years, and compute YOY growth.
- Filter for suppliers that appear in all relevant years (2018–2022).
- Only consider suppliers with a meaningful (>= 100k) total in 2022 to focus on large-volume providers.
- Identify top 10 by average growth rate.

In [7]:
if not combined_df.empty:
    # 5.1 Group by (Supplier, year), then sum relevant metrics
    supplier_yearly = combined_df.groupby([
        'Suplr_NPI',
        'Suplr_Prvdr_Last_Name_Org',
        'year'
    ], as_index=False).agg({
        'Suplr_Sbmtd_Chrgs': 'sum',
        'Suplr_Mdcr_Pymt_Amt': 'sum',
        'Tot_Suplr_Benes': 'mean',  # average across rows
        'Tot_Suplr_Clms': 'sum'
    })

    # Create a pivot where columns are years, values are 'Suplr_Mdcr_Pymt_Amt'
    pivot_charges = supplier_yearly.pivot_table(
        index=['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org'],
        columns='year',
        values='Suplr_Mdcr_Pymt_Amt',
        fill_value=0
    )

    # We'll calculate YOY growth for (2019 vs 2018), (2020 vs 2019), etc.
    growth_rates = pd.DataFrame(index=pivot_charges.index)
    for year_pair in [(2019, 2018), (2020, 2019), (2021, 2020), (2022, 2021)]:
        current, previous = year_pair
        growth_column = f'growth_{current}'
        growth_rates[growth_column] = (
            (pivot_charges[current] - pivot_charges[previous]) /
            pivot_charges[previous].replace(0, np.nan)
        ) * 100

    growth_cols = [col for col in growth_rates.columns if col.startswith('growth_')]
    growth_rates['avg_growth'] = growth_rates[growth_cols].mean(axis=1)

    # Filter: Supplier must have >0 in all years, and >=100k in 2022
    filter_mask = (
        (pivot_charges[2018] > 0) &
        (pivot_charges[2019] > 0) &
        (pivot_charges[2020] > 0) &
        (pivot_charges[2021] > 0) &
        (pivot_charges[2022] >= 100000)
    )

    valid_suppliers = pivot_charges[filter_mask]
    valid_growth = growth_rates.loc[valid_suppliers.index].reset_index()

    # Merge with aggregated totals (all years combined) just for more reporting info
    supplier_totals = supplier_yearly.groupby([
        'Suplr_NPI',
        'Suplr_Prvdr_Last_Name_Org'
    ], as_index=False).agg({
        'Suplr_Sbmtd_Chrgs': 'sum',
        'Suplr_Mdcr_Pymt_Amt': 'sum',
        'Tot_Suplr_Benes': 'mean',
        'Tot_Suplr_Clms': 'sum'
    })

    growth_merged = pd.merge(
        valid_growth,
        supplier_totals,
        on=['Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org'],
        how='left'
    )

    # Sort by average growth descending
    top_growth = growth_merged.sort_values('avg_growth', ascending=False).head(10)
    
    print("\nTop 10 Suppliers by Average Year-over-Year Growth (2018–2022), \n\n",
          "Filtered to those with >= $100K in 2022 payments:")
    display(top_growth)



Top 10 Suppliers by Average Year-over-Year Growth (2018–2022), 

 Filtered to those with >= $100K in 2022 payments:


Unnamed: 0,Suplr_NPI,Suplr_Prvdr_Last_Name_Org,growth_2019,growth_2020,growth_2021,growth_2022,avg_growth,Suplr_Sbmtd_Chrgs,Suplr_Mdcr_Pymt_Amt,Tot_Suplr_Benes,Tot_Suplr_Clms
500,1063967768,"P-Cares Medical Supplies, Llc",216.76299,-18.27912,-1.438596,59704.308031,14975.338326,29969860.0,15894839.27,3029.6,21002
5958,1891275590,Lincare Inc,43427.677155,40.042523,9.862999,1.397273,10869.744987,135110000.0,19498239.26,8006.6,250598
3078,1457837080,"Respiratory Services Of Western New York, Inc.",24521.584413,489.458451,59.869097,46.621208,6279.383292,1742520.0,636034.24,442.0,8743
5538,1821424789,"Vohra Post Acute Care Physicians Of Texas, Pllc",24557.093269,105.801915,24.58983,1.304009,6172.197256,19543720.0,7851147.07,1268.25,21386
3389,1508938127,Aahi St Joseph Mercy Hospital Inc,-73.766346,-96.932529,23208.805119,139.646393,5794.438159,1052284.0,477838.88,,421
1197,1174553804,"Care One Medical Equipment And Supplies, Inc.",19205.127235,50.732207,-6.81621,34.941358,4820.996148,3241600.0,1038009.02,529.25,11495
3365,1508826199,"The Home Health Store Of Tomball, Inc.",70.638944,23.053685,53.07501,17908.424045,4513.797921,29148110.0,15253804.77,5578.4,56914
5022,1750391751,"Amerihealth Medical Group, Inc.",18039.35785,-46.804741,6.478163,-16.942326,4495.522236,2947875.0,1106496.2,873.6,19592
3923,1598044208,"Scooter Chair Repair Georgia, Llc",16495.684706,46.102762,-17.790577,-18.34884,4126.412013,10784920.0,4825597.72,219.6,3544
2865,1437108214,"Christian Home Health Services, Inc",16471.811385,-19.951771,10.856242,4.742256,4116.864528,1948006.0,573413.07,391.75,7344


### Display Year-by-Year Payment Patterns for Top 10
We'll show each supplier's biggest jump and beneficiary growth, if available.

In [9]:
if not combined_df.empty:
    # Create a function to display details for the top-10
    def show_top_10_growth_details(top_df, supplier_yearly_df):
        print("\nDetailed Patterns for Top 10 Growth Suppliers:\n")
        top_npi = top_df['Suplr_NPI'].tolist()

        # Filter original groupby results for just these suppliers
        subset = supplier_yearly_df[supplier_yearly_df['Suplr_NPI'].isin(top_npi)].copy()
        subset.sort_values(['Suplr_NPI', 'year'], inplace=True)

        for i, row in enumerate(top_df.itertuples(), start=1):
            npi = row.Suplr_NPI
            name = row.Suplr_Prvdr_Last_Name_Org
            avg_growth = row.avg_growth
            total_pay = row.Suplr_Mdcr_Pymt_Amt

            # Grab the subset for this supplier
            data = subset[subset['Suplr_NPI'] == npi]
            data.sort_values('year', inplace=True)

            print(f"{i}. {name} (NPI: {npi})")
            print(f"   - Average Growth: {avg_growth:.2f}%")
            print(f"   - Total Medicare Payments (2018–2022): {format_dollar_amount(total_pay)}")

            # Show year-by-year
            year_strs = []
            for y in range(2018, 2023):
                row_y = data[data['year'] == y]
                if not row_y.empty:
                    pay = row_y.iloc[0]['Suplr_Mdcr_Pymt_Amt']
                    year_strs.append(f"{y}: {format_dollar_amount(pay)}")
                else:
                    year_strs.append(f"{y}: $0")
            print("   - Year-by-year Payments: " + ", ".join(year_strs))

            # Identify the largest yoy jump
            data_list = data[['year', 'Suplr_Mdcr_Pymt_Amt']].sort_values('year').values.tolist()
            max_jump = 0
            jump_year = None
            for idx in range(1, len(data_list)):
                prev_amt = data_list[idx-1][1]
                curr_amt = data_list[idx][1]
                if prev_amt > 0:
                    yoy_pct = (curr_amt - prev_amt) / prev_amt * 100
                    if yoy_pct > max_jump:
                        max_jump = yoy_pct
                        jump_year = (data_list[idx-1][0], data_list[idx][0])

            if jump_year:
                print(f"   - Largest Jump: {jump_year[0]} to {jump_year[1]} (+{max_jump:.2f}%)")

            # Check beneficiary growth
            benes = data[['year', 'Tot_Suplr_Benes']].dropna()
            if len(benes) > 1:
                benes.sort_values('year', inplace=True)
                first_benes = benes.iloc[0]['Tot_Suplr_Benes']
                last_benes = benes.iloc[-1]['Tot_Suplr_Benes']
                if first_benes > 0:
                    bene_growth = (last_benes - first_benes) / first_benes * 100
                    print(f"   - Beneficiary Growth: {bene_growth:.1f}% ")

            print("")

    show_top_10_growth_details(top_growth, supplier_yearly)


Detailed Patterns for Top 10 Growth Suppliers:

1. P-Cares Medical Supplies, Llc (NPI: 1063967768)
   - Average Growth: 14975.34%
   - Total Medicare Payments (2018–2022): $15.9M
   - Year-by-year Payments: 2018: $10.4K, 2019: $32.8K, 2020: $26.8K, 2021: $26.4K, 2022: $15.8M
   - Largest Jump: 2021.0 to 2022.0 (+59704.31%)
   - Beneficiary Growth: 52782.1% 

2. Lincare Inc (NPI: 1891275590)
   - Average Growth: 10869.74%
   - Total Medicare Payments (2018–2022): $19.5M
   - Year-by-year Payments: 2018: $8.1K, 2019: $3.5M, 2020: $5.0M, 2021: $5.5M, 2022: $5.5M
   - Largest Jump: 2018.0 to 2019.0 (+43427.68%)
   - Beneficiary Growth: 10389.0% 

3. Respiratory Services Of Western New York, Inc. (NPI: 1457837080)
   - Average Growth: 6279.38%
   - Total Medicare Payments (2018–2022): $636.0K
   - Year-by-year Payments: 2018: $86, 2019: $21.1K, 2020: $124.4K, 2021: $198.9K, 2022: $291.6K
   - Largest Jump: 2018.0 to 2019.0 (+24521.58%)
   - Beneficiary Growth: 480.9% 

4. Vohra Post Acute 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('year', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('year', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('year', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.sort_values('year', inplace=True)
A value is t

# 6. Analysis of High Submitted vs. Low Allowed/Paid Amounts
We check each supplier's total submitted charges vs. the allowed and paid amounts across **all** years.

In [10]:
if not combined_df.empty:
    supplier_totals_ap = combined_df.groupby([
        'Suplr_NPI',
        'Suplr_Prvdr_Last_Name_Org'
    ], as_index=False).agg({
        'Suplr_Sbmtd_Chrgs': 'sum',
        'Suplr_Mdcr_Alowd_Amt': 'sum',
        'Suplr_Mdcr_Pymt_Amt': 'sum',
        'Tot_Suplr_Benes': 'mean',
        'Tot_Suplr_Clms': 'sum'
    })

    supplier_totals_ap['submitted_allowed_ratio'] = (
        supplier_totals_ap['Suplr_Sbmtd_Chrgs'] / (supplier_totals_ap['Suplr_Mdcr_Alowd_Amt'] + 1e-9)
    )
    supplier_totals_ap['submitted_paid_ratio'] = (
        supplier_totals_ap['Suplr_Sbmtd_Chrgs'] / (supplier_totals_ap['Suplr_Mdcr_Pymt_Amt'] + 1e-9)
    )

    # Focus on those with at least $100K submitted charges to reduce noise
    significant_ap = supplier_totals_ap[supplier_totals_ap['Suplr_Sbmtd_Chrgs'] >= 100000]

    # Highest submitted-to-allowed ratio
    top_allowed = significant_ap.sort_values(
        'submitted_allowed_ratio', ascending=False
    ).head(10)

    print("Top 10 Suppliers: Highest Submitted Charges vs. Allowed Amount Ratio\n")
    for i, row in top_allowed.iterrows():
        npi = row['Suplr_NPI']
        name = row['Suplr_Prvdr_Last_Name_Org']
        submitted = row['Suplr_Sbmtd_Chrgs']
        allowed = row['Suplr_Mdcr_Alowd_Amt']
        paid = row['Suplr_Mdcr_Pymt_Amt']
        ratio = row['submitted_allowed_ratio']

        print(f"- {name} (NPI: {npi})")
        print(f"  Submitted: {format_dollar_amount(submitted)}, Allowed: {format_dollar_amount(allowed)}, Paid: {format_dollar_amount(paid)}")
        print(f"  Submitted : Allowed = {ratio:.2f}x\n")

    # Highest submitted-to-paid ratio
    top_paid = significant_ap.sort_values(
        'submitted_paid_ratio', ascending=False
    ).head(10)

    print("\nTop 10 Suppliers: Highest Submitted Charges vs. Paid Amount Ratio\n")
    for i, row in top_paid.iterrows():
        npi = row['Suplr_NPI']
        name = row['Suplr_Prvdr_Last_Name_Org']
        submitted = row['Suplr_Sbmtd_Chrgs']
        allowed = row['Suplr_Mdcr_Alowd_Amt']
        paid = row['Suplr_Mdcr_Pymt_Amt']
        ratio = row['submitted_paid_ratio']

        print(f"- {name} (NPI: {npi})")
        print(f"  Submitted: {format_dollar_amount(submitted)}, Allowed: {format_dollar_amount(allowed)}, Paid: {format_dollar_amount(paid)}")
        print(f"  Submitted : Paid = {ratio:.2f}x\n")

Top 10 Suppliers: Highest Submitted Charges vs. Allowed Amount Ratio

- Flatbush Rx Corp (NPI: 1669839536)
  Submitted: $252.8K, Allowed: $1.1K, Paid: $616
  Submitted : Allowed = 221.97x

- Arooba Corp (NPI: 1649225152)
  Submitted: $312.0K, Allowed: $1.7K, Paid: $1.1K
  Submitted : Allowed = 182.41x

- Mingocare Inc (NPI: 1003228156)
  Submitted: $702.7K, Allowed: $4.0K, Paid: $2.4K
  Submitted : Allowed = 177.83x

- Nile City Pharmacy Inc (NPI: 1578076212)
  Submitted: $106.4K, Allowed: $702, Paid: $524
  Submitted : Allowed = 151.50x

- Farmacia Julia Discount #2 Llc (NPI: 1457430274)
  Submitted: $410.4K, Allowed: $3.4K, Paid: $2.1K
  Submitted : Allowed = 122.31x

- Gamer Pharmacy Inc (NPI: 1588697692)
  Submitted: $9.3M, Allowed: $76.9K, Paid: $56.8K
  Submitted : Allowed = 120.95x

- Madina Pharmacy Inc (NPI: 1538525316)
  Submitted: $427.3K, Allowed: $4.3K, Paid: $2.7K
  Submitted : Allowed = 99.99x

- Colonial Pharmacy Inc (NPI: 1255438198)
  Submitted: $407.8K, Allowed: $4.1

# 7. Peer Group Analysis
Analyze suppliers in the context of their **specialty**, **state**, or combined specialty–state. 
Outliers are flagged if they exceed 3× the peer group's median in more than one of these metrics:
- Total Claims
- Total Submitted Charges
- Total Payments

# 8. Conclusions & Next Steps
We've combined multi-year DME data, identified year-over-year outliers, analyzed high submitted vs. allowed/paid ratios, and performed peer-group checks.

### Potential Enhancements
1. **Additional Metrics**: Incorporate DME-specific categories (e.g., prosthetics vs. drug/nutrition) and investigate outliers in each.
2. **Machine Learning**: Replace threshold-based outlier detection with algorithms (Isolation Forest, DBSCAN, etc.).
3. **Visualization**: Plot distributions, boxplots, or time-series charts for top suspicious suppliers.
4. **Interactive Dashboards**: Provide an interface for users to adjust thresholds and instantly see flagged suppliers.


In [11]:
if not combined_df.empty:
    # Ensure we have columns needed for specialty/state analysis
    required_cols = [
        'Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org',
        'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_State_Abrvtn',
        'Suplr_Sbmtd_Chrgs', 'Suplr_Mdcr_Pymt_Amt',
        'Tot_Suplr_Clms', 'Tot_Suplr_Srvcs'
    ]
    missing_cols = [c for c in required_cols if c not in combined_df.columns]
    if missing_cols:
        print(f"Missing columns for Peer Group Analysis: {missing_cols}")
    else:
        supplier_metrics = combined_df.groupby([
            'Suplr_NPI', 'Suplr_Prvdr_Last_Name_Org',
            'Suplr_Prvdr_Spclty_Desc', 'Suplr_Prvdr_State_Abrvtn'
        ], as_index=False).agg({
            'Suplr_Sbmtd_Chrgs': 'sum',
            'Suplr_Mdcr_Pymt_Amt': 'sum',
            'Tot_Suplr_Clms': 'sum',
            'Tot_Suplr_Srvcs': 'sum'
        })

        # Add derived metrics
        supplier_metrics['Avg_Chrg_Per_Clm'] = supplier_metrics['Suplr_Sbmtd_Chrgs'] / supplier_metrics['Tot_Suplr_Clms'].replace(0, np.nan)
        supplier_metrics['Avg_Pymt_Per_Clm'] = supplier_metrics['Suplr_Mdcr_Pymt_Amt'] / supplier_metrics['Tot_Suplr_Clms'].replace(0, np.nan)
        supplier_metrics['Avg_Srvcs_Per_Clm'] = supplier_metrics['Tot_Suplr_Srvcs'] / supplier_metrics['Tot_Suplr_Clms'].replace(0, np.nan)

        print("\n## Peer Group Analysis by Specialty\n")
        specialty_counts = supplier_metrics['Suplr_Prvdr_Spclty_Desc'].value_counts()
        valid_specialties = specialty_counts[specialty_counts >= 5].index  # at least 5 suppliers

        if len(valid_specialties) > 0:
            peer_specialty_metrics = supplier_metrics[supplier_metrics['Suplr_Prvdr_Spclty_Desc'].isin(valid_specialties)].groupby('Suplr_Prvdr_Spclty_Desc').agg({
                'Suplr_Sbmtd_Chrgs': ['median'],
                'Suplr_Mdcr_Pymt_Amt': ['median'],
                'Tot_Suplr_Clms': ['median'],
                'Tot_Suplr_Srvcs': ['median']
            })
            peer_specialty_metrics.columns = ["_".join(col) for col in peer_specialty_metrics.columns]

            outliers_by_specialty = []

            for specialty in valid_specialties:
                group = supplier_metrics[supplier_metrics['Suplr_Prvdr_Spclty_Desc'] == specialty]
                med_clms = peer_specialty_metrics.loc[specialty, 'Tot_Suplr_Clms_median']
                med_chrg = peer_specialty_metrics.loc[specialty, 'Suplr_Sbmtd_Chrgs_median']
                med_pay = peer_specialty_metrics.loc[specialty, 'Suplr_Mdcr_Pymt_Amt_median']

                # Compare each supplier to 3x median
                claim_outliers = group[group['Tot_Suplr_Clms'] > 3 * med_clms]
                charge_outliers = group[group['Suplr_Sbmtd_Chrgs'] > 3 * med_chrg]
                payment_outliers = group[group['Suplr_Mdcr_Pymt_Amt'] > 3 * med_pay]

                # Combine
                all_out = pd.concat([
                    claim_outliers[['Suplr_NPI']].assign(flag='claims'),
                    charge_outliers[['Suplr_NPI']].assign(flag='charges'),
                    payment_outliers[['Suplr_NPI']].assign(flag='payments')
                ], ignore_index=True)
                # We want suppliers that appear at least in 2 out of 3 categories
                outlier_counts = all_out.groupby('Suplr_NPI').size()
                multi_flags = outlier_counts[outlier_counts >= 2].index
                multi_outliers = group[group['Suplr_NPI'].isin(multi_flags)]

                for idx, row in multi_outliers.iterrows():
                    outliers_by_specialty.append({
                        'NPI': row['Suplr_NPI'],
                        'Name': row['Suplr_Prvdr_Last_Name_Org'],
                        'Specialty': row['Suplr_Prvdr_Spclty_Desc'],
                        'State': row['Suplr_Prvdr_State_Abrvtn'],
                        'Total_Claims': row['Tot_Suplr_Clms'],
                        'Total_Charges': row['Suplr_Sbmtd_Chrgs'],
                        'Total_Payments': row['Suplr_Mdcr_Pymt_Amt']
                    })

            if len(outliers_by_specialty) > 0:
                # Just show top 10 by total charges
                outliers_by_specialty = sorted(
                    outliers_by_specialty,
                    key=lambda x: x['Total_Charges'],
                    reverse=True
                )

                print("Significant Specialty Outliers (exceeding 3× median in >=2 metrics):")
                for outlier in outliers_by_specialty[:10]:
                    print(f"- {outlier['Name']} (NPI: {outlier['NPI']})")
                    print(f"  Specialty: {outlier['Specialty']} | State: {outlier['State']}")
                    print(f"  Claims: {outlier['Total_Claims']:,}, Charges: {format_dollar_amount(outlier['Total_Charges'])}, Payments: {format_dollar_amount(outlier['Total_Payments'])}\n")
            else:
                print("No multi-metric outliers by specialty.")
        else:
            print("No specialty with >=5 suppliers.")

        print("\n## Peer Group Analysis by State\n")
        state_counts = supplier_metrics['Suplr_Prvdr_State_Abrvtn'].value_counts()
        valid_states = state_counts[state_counts >= 5].index

        if len(valid_states) > 0:
            peer_state_metrics = supplier_metrics[supplier_metrics['Suplr_Prvdr_State_Abrvtn'].isin(valid_states)].groupby('Suplr_Prvdr_State_Abrvtn').agg({
                'Suplr_Sbmtd_Chrgs': ['median'],
                'Suplr_Mdcr_Pymt_Amt': ['median'],
                'Tot_Suplr_Clms': ['median'],
                'Tot_Suplr_Srvcs': ['median']
            })
            peer_state_metrics.columns = ["_".join(col) for col in peer_state_metrics.columns]

            outliers_by_state = []

            for st in valid_states:
                group = supplier_metrics[supplier_metrics['Suplr_Prvdr_State_Abrvtn'] == st]
                med_clms = peer_state_metrics.loc[st, 'Tot_Suplr_Clms_median']
                med_chrg = peer_state_metrics.loc[st, 'Suplr_Sbmtd_Chrgs_median']
                med_pay = peer_state_metrics.loc[st, 'Suplr_Mdcr_Pymt_Amt_median']

                # Compare to 3x
                claim_outliers = group[group['Tot_Suplr_Clms'] > 3 * med_clms]
                charge_outliers = group[group['Suplr_Sbmtd_Chrgs'] > 3 * med_chrg]
                payment_outliers = group[group['Suplr_Mdcr_Pymt_Amt'] > 3 * med_pay]

                all_out = pd.concat([
                    claim_outliers[['Suplr_NPI']].assign(flag='claims'),
                    charge_outliers[['Suplr_NPI']].assign(flag='charges'),
                    payment_outliers[['Suplr_NPI']].assign(flag='payments')
                ], ignore_index=True)
                outlier_counts = all_out.groupby('Suplr_NPI').size()
                multi_flags = outlier_counts[outlier_counts >= 2].index
                multi_outliers = group[group['Suplr_NPI'].isin(multi_flags)]

                for idx, row in multi_outliers.iterrows():
                    outliers_by_state.append({
                        'NPI': row['Suplr_NPI'],
                        'Name': row['Suplr_Prvdr_Last_Name_Org'],
                        'Specialty': row['Suplr_Prvdr_Spclty_Desc'],
                        'State': st,
                        'Total_Claims': row['Tot_Suplr_Clms'],
                        'Total_Charges': row['Suplr_Sbmtd_Chrgs'],
                        'Total_Payments': row['Suplr_Mdcr_Pymt_Amt']
                    })

            if len(outliers_by_state) > 0:
                outliers_by_state = sorted(
                    outliers_by_state,
                    key=lambda x: x['Total_Charges'],
                    reverse=True
                )
                print("Significant State Outliers (>= 3× median in >=2 metrics):")
                for outlier in outliers_by_state[:10]:
                    print(f"- {outlier['Name']} (NPI: {outlier['NPI']})")
                    print(f"  State: {outlier['State']} | Specialty: {outlier['Specialty']}")
                    print(f"  Claims: {outlier['Total_Claims']:,}, Charges: {format_dollar_amount(outlier['Total_Charges'])}, Payments: {format_dollar_amount(outlier['Total_Payments'])}\n")
            else:
                print("No multi-metric outliers by state.")
        else:
            print("No states with >=5 suppliers.")

        print("\n## Peer Group Analysis by Combined Specialty–State\n")
        supplier_metrics['SpecState'] = supplier_metrics['Suplr_Prvdr_Spclty_Desc'].astype(str) + ' - ' + supplier_metrics['Suplr_Prvdr_State_Abrvtn'].astype(str)
        combo_counts = supplier_metrics['SpecState'].value_counts()
        valid_specstates = combo_counts[combo_counts >= 5].index

        if len(valid_specstates) > 0:
            # Calculate medians for each group
            combo_medians = supplier_metrics[supplier_metrics['SpecState'].isin(valid_specstates)].groupby('SpecState').agg({
                'Suplr_Sbmtd_Chrgs': 'median',
                'Suplr_Mdcr_Pymt_Amt': 'median',
                'Tot_Suplr_Clms': 'median',
                'Tot_Suplr_Srvcs': 'median'
            })
            outliers_combined = []
            
            for cs in valid_specstates:
                group = supplier_metrics[supplier_metrics['SpecState'] == cs]
                med_clms = combo_medians.loc[cs, 'Tot_Suplr_Clms']
                med_chrg = combo_medians.loc[cs, 'Suplr_Sbmtd_Chrgs']
                med_pay = combo_medians.loc[cs, 'Suplr_Mdcr_Pymt_Amt']

                claim_outliers = group[group['Tot_Suplr_Clms'] > 3 * med_clms]
                charge_outliers = group[group['Suplr_Sbmtd_Chrgs'] > 3 * med_chrg]
                payment_outliers = group[group['Suplr_Mdcr_Pymt_Amt'] > 3 * med_pay]

                all_out = pd.concat([
                    claim_outliers[['Suplr_NPI']].assign(flag='claims'),
                    charge_outliers[['Suplr_NPI']].assign(flag='charges'),
                    payment_outliers[['Suplr_NPI']].assign(flag='payments')
                ], ignore_index=True)
                outlier_counts = all_out.groupby('Suplr_NPI').size()
                multi_flags = outlier_counts[outlier_counts >= 2].index

                multi_outliers = group[group['Suplr_NPI'].isin(multi_flags)]
                for idx, row in multi_outliers.iterrows():
                    outliers_combined.append({
                        'NPI': row['Suplr_NPI'],
                        'Name': row['Suplr_Prvdr_Last_Name_Org'],
                        'SpecState': cs,
                        'Specialty': row['Suplr_Prvdr_Spclty_Desc'],
                        'State': row['Suplr_Prvdr_State_Abrvtn'],
                        'Total_Claims': row['Tot_Suplr_Clms'],
                        'Total_Charges': row['Suplr_Sbmtd_Chrgs'],
                        'Total_Payments': row['Suplr_Mdcr_Pymt_Amt']
                    })
            
            if outliers_combined:
                # Sort by total charges just as a quick way to highlight big outliers
                outliers_combined = sorted(
                    outliers_combined,
                    key=lambda x: x['Total_Charges'],
                    reverse=True
                )
                print("Significant Combined Specialty–State Outliers (>= 3× median in >=2 metrics):")
                for outlier in outliers_combined[:10]:
                    print(f"- {outlier['Name']} (NPI: {outlier['NPI']})")
                    print(f"  Specialty: {outlier['Specialty']} | State: {outlier['State']}")
                    print(f"  Claims: {outlier['Total_Claims']:,}, Charges: {format_dollar_amount(outlier['Total_Charges'])}, Payments: {format_dollar_amount(outlier['Total_Payments'])}\n")
            else:
                print("No multi-metric outliers at the combined specialty–state level.")
        else:
            print("No combined specialty–state groups with >=5 suppliers.")


## Peer Group Analysis by Specialty

Significant Specialty Outliers (exceeding 3× median in >=2 metrics):
- Accredo Health Group Inc (NPI: 1417915653)
  Specialty: Pharmacy | State: PA
  Claims: 209,938, Charges: $3464.6M, Payments: $1267.1M

- North Coast Medical Supply, Llc (NPI: 1245259282)
  Specialty: Pharmacy | State: CA
  Claims: 1,236,598, Charges: $3458.1M, Payments: $245.3M

- Lincare Pharmacy Services Inc. (NPI: 1780748939)
  Specialty: Pharmacy | State: FL
  Claims: 2,533,531, Charges: $2178.0M, Payments: $644.5M

- Zoll Services Llc (NPI: 1164535274)
  Specialty: Other Medical Supply Company | State: PA
  Claims: 345,064, Charges: $1365.1M, Payments: $738.0M

- Degc Enterprises (U.S.), Inc. (NPI: 1295827780)
  Specialty: Pharmacy | State: FL
  Claims: 1,329,923, Charges: $1291.7M, Payments: $325.9M

- United States Medical Supply, Llc (NPI: 1700889227)
  Specialty: Other Medical Supply Company | State: FL
  Claims: 3,296,437, Charges: $1103.4M, Payments: $297.8M

- 180 Me