In [2]:
import pandas as pd
from datetime import timedelta

# Claim Table Generator

In [3]:
service_dates = pd.date_range('2017-01-01', pd.datetime.now(), freq='d')
paid_dates = service_dates + timedelta(days=30)
received_dates = service_dates - timedelta(days=10)
allowed_amounts = pd.np.random.random(len(service_dates)) * 1000
claim_numbers = pd.np.random.randint(10000000, 10000000 + len(service_dates)/10, len(service_dates))
paid_amounts = allowed_amounts * .14

In [6]:
claim_data = pd.DataFrame(dict(CLM_NO=claim_numbers, 
                               RCV_DT=received_dates,
                               SERV_DT=service_dates,
                               RESOLVED_DT=paid_dates,
                               ALLOW_AMT=allowed_amounts,
                               RESOLVED_AMT=paid_amounts)
                           ).groupby(['CLM_NO', 'RCV_DT', 'SERV_DT', 'RESOLVED_DT'], as_index=False).sum()

claim_data = claim_data.assign(CLM_NO_LN = lambda df: df.groupby(['CLM_NO']).SERV_DT.rank().astype(int))
claim_data = claim_data[['CLM_NO', 'CLM_NO_LN', 'RCV_DT', 'SERV_DT', 'RESOLVED_DT', 'ALLOW_AMT', 'RESOLVED_AMT']]

In [7]:
claim_data.to_csv('ClaimDataExample1.csv', index=False)

In [8]:
claim_data.to_csv('ClaimDataExample2.csv', index=False)

In [9]:
claim_data.to_csv('ClaimDataExample3.csv', index=False)

# Control Table Generator

In [10]:
control_data = claim_data.agg(['sum', 'count']).T.reset_index().rename(columns={'index':'ColumnName', 'sum':'Sum', 'count':'Count'})

In [11]:
control_data.to_csv('ControlDataExample.csv', index=False)

# Membership Table Generator

In [12]:
member_dates = pd.date_range('2017-01-01', pd.datetime.now(), freq='MS')
member_quantity = pd.np.random.randint(0, 2500000, len(member_dates))

In [13]:
member_data = pd.DataFrame(dict(MEM_MONTH=member_dates, 
                                 MEM_QTY=member_quantity))

In [14]:
member_data.to_csv('MemberDataExample.csv', index=False)

# Header Table Generator

In [15]:
claim_data.head()

Unnamed: 0,CLM_NO,CLM_NO_LN,RCV_DT,SERV_DT,RESOLVED_DT,ALLOW_AMT,RESOLVED_AMT
0,10000000,1,2017-02-04,2017-02-14,2017-03-16,63.067049,8.829387
1,10000000,2,2017-07-08,2017-07-18,2017-08-17,25.395067,3.555309
2,10000000,3,2017-09-15,2017-09-25,2017-10-25,901.775762,126.248607
3,10000000,4,2018-03-03,2018-03-13,2018-04-12,967.199097,135.407874
4,10000000,5,2018-08-04,2018-08-14,2018-09-13,325.940441,45.631662


In [16]:
header_claim_number = claim_data.CLM_NO.unique()
lob = ['Broad PPO', 'Narrow', 'Centers of Excellence', 'Value-Based Care']
lob = pd.np.random.choice(lob, len(header_claim_number))
header_data = pd.DataFrame(dict(CLM_NO=header_claim_number, LOB_NAME=lob))

In [17]:
header_data.to_csv('HeaderDataExample.csv', index=False)