# Overview
The purpose of this analysis is to compare the number and percentage of unique beneficiaries. <br>
Visit Data.gov to download the data used for this analysis.

#  Load libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path
import matplotlib

%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12,8)

# Load data

In [2]:
synthetic_bene_df = pd.read_csv('data/beneficiary_2022.csv', sep="|", low_memory=False)

synthetic_inpatient_df = pd.read_csv('data/inpatient.csv',  sep="|", low_memory=False)
print(f"inpatient shape {synthetic_inpatient_df.shape}")

synthetic_outpatient_df = pd.read_csv('data/outpatient.csv', sep="|",low_memory=False)
print(f"outpatient shape{synthetic_outpatient_df.shape}")

synthetic_carrier_df = pd.read_csv('data/carrier.csv', sep="|", low_memory=False)
print(f"carrier shape{synthetic_carrier_df.shape}")

synthetic_dme_df = pd.read_csv('data/dme.csv', sep="|",  low_memory=False)
print(f"dme shape {synthetic_dme_df.shape}")

synthetic_hha_df = pd.read_csv('data/hha.csv', sep="|", low_memory=False)
print(f"hha shape {synthetic_hha_df.shape}")

synthetic_hospice_df = pd.read_csv('data/hospice.csv', sep="|", low_memory=False)
print(f"hospice shape {synthetic_hospice_df.shape}")

synthetic_snf_df = pd.read_csv('data/snf.csv', sep="|", low_memory=False)
print(f"snf shape {synthetic_snf_df.shape}")

inpatient shape (58066, 275)
outpatient shape(575092, 234)
carrier shape(1121004, 100)
dme shape (103828, 95)
hha shape (6215, 132)
hospice shape (12107, 130)
snf shape (12548, 230)


In [3]:
# unique count of BENE_IDs
unique_ids = synthetic_bene_df['BENE_ID'].unique()

# print results
print(f"{len(unique_ids)} unique ids and {synthetic_bene_df.shape[0]} total rows")

8671 unique ids and 8671 total rows


# Calculate number of unique synthetic benes in 2022

In [4]:
# evaluate 2022 only
values = [2022]

# convert CLM_FROM_DT to datetime
synthetic_inpatient_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_inpatient_df['CLM_FROM_DT'])
# extract year from CLM_FROM_DT 
synthetic_inpatient_df['CLM_FROM_YR'] = synthetic_inpatient_df['CLM_FROM_DT'].dt.year
# filter CLM_FROM_YR on 2022 only
synthetic_inpatient_df = synthetic_inpatient_df[synthetic_inpatient_df.CLM_FROM_YR.isin(values)]

synthetic_outpatient_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_outpatient_df['CLM_FROM_DT'])
synthetic_outpatient_df['CLM_FROM_YR'] = synthetic_outpatient_df['CLM_FROM_DT'].dt.year
synthetic_outpatient_df = synthetic_outpatient_df[synthetic_outpatient_df.CLM_FROM_YR.isin(values)]

synthetic_carrier_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_carrier_df['CLM_FROM_DT'])
synthetic_carrier_df['CLM_FROM_YR'] = synthetic_carrier_df['CLM_FROM_DT'].dt.year
synthetic_carrier_df = synthetic_carrier_df[synthetic_carrier_df.CLM_FROM_YR.isin(values)]

synthetic_dme_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_dme_df['CLM_FROM_DT'])
synthetic_dme_df['CLM_FROM_YR'] = synthetic_dme_df['CLM_FROM_DT'].dt.year
synthetic_dme_df = synthetic_dme_df[synthetic_dme_df.CLM_FROM_YR.isin(values)]

synthetic_hha_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_hha_df['CLM_FROM_DT'])
synthetic_hha_df['CLM_FROM_YR'] = synthetic_hha_df['CLM_FROM_DT'].dt.year
synthetic_hha_df = synthetic_hha_df[synthetic_hha_df.CLM_FROM_YR.isin(values)]

synthetic_hospice_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_hospice_df['CLM_FROM_DT'])
synthetic_hospice_df['CLM_FROM_YR'] = synthetic_hospice_df['CLM_FROM_DT'].dt.year
synthetic_hospice_df = synthetic_hospice_df[synthetic_hospice_df.CLM_FROM_YR.isin(values)]

synthetic_snf_df['CLM_FROM_DT'] = pd.to_datetime(synthetic_snf_df['CLM_FROM_DT'])
synthetic_snf_df['CLM_FROM_YR'] = synthetic_snf_df['CLM_FROM_DT'].dt.year
synthetic_snf_df = synthetic_snf_df[synthetic_snf_df.CLM_FROM_YR.isin(values)]

# print count of unique BENE_IDs in each service type
print(synthetic_inpatient_df[['BENE_ID']].nunique())
print(synthetic_outpatient_df[['BENE_ID']].nunique())
print(synthetic_carrier_df[['BENE_ID']].nunique())
print(synthetic_dme_df[['BENE_ID']].nunique())
print(synthetic_hha_df[['BENE_ID']].nunique())
print(synthetic_hospice_df[['BENE_ID']].nunique())
print(synthetic_snf_df[['BENE_ID']].nunique())  

BENE_ID    1684
dtype: int64
BENE_ID    6053
dtype: int64
BENE_ID    7581
dtype: int64
BENE_ID    3653
dtype: int64
BENE_ID    61
dtype: int64
BENE_ID    150
dtype: int64
BENE_ID    235
dtype: int64
