## Setup and Data Import

In [1]:
import sys
sys.path.insert(0, '..')

from joblib import load

import Sita_Functions as fxns
from Sita_Functions import np, pd

pd.set_option('display.max_columns', None)

from datetime import timedelta

import plotly.express as px
import plotly.graph_objects as go

## Claims DF

In [2]:
!python Sita_Preprocessing.py

In [3]:
claims = load('claims.pkl')

### New Columns

In [4]:
claims['AgeAtService'] = ( (claims.ClaimStartDt - claims.DOB)
                           / timedelta(days=365) ).astype(int)

claims['ClaimCost'] = \
    claims.InscClaimAmtReimbursed + claims.DeductibleAmtPaid

claims['IPDuration'] = \
    (claims.DischargeDt - claims.AdmissionDt).dt.days

claims['ClaimDuration'] = \
    (claims.ClaimEndDt - claims.ClaimStartDt).dt.days

### Variables

In [5]:
numeric_cols = fxns.cols_by_dtype(claims)[0]
categorical_cols = fxns.cols_by_dtype(claims)[1]
date_cols = fxns.cols_by_dtype(claims)[2]

physician_cols = claims.columns[claims.columns.str.contains('Physician')].to_list()
chronic_cols = claims.columns[claims.columns.str.contains('Chronic')].to_list()

In [6]:
outpatient_claims = claims.loc[claims.IsOutpatient == '1']
inpatient_claims = claims.loc[claims.IsOutpatient == '0']

### Summary Data

In [7]:
providers = claims.groupby('Provider')

In [8]:
groupby_size = providers.size()
groupby_size.name = 'Num_Claims'

In [9]:
groupby_mean = providers.mean().add_suffix('_mean').reset_index()

In [10]:
groupby_sum = providers.sum().add_suffix('_sum').reset_index()

In [11]:
providers = pd.merge(groupby_mean, groupby_sum, on='Provider')
providers = pd.merge(providers, groupby_size, on='Provider')

In [12]:
providers.columns

Index(['Provider', 'InscClaimAmtReimbursed_mean', 'DeductibleAmtPaid_mean',
       'NoOfMonths_PartACov_mean', 'NoOfMonths_PartBCov_mean',
       'IPAnnualReimbursementAmt_mean', 'IPAnnualDeductibleAmt_mean',
       'OPAnnualReimbursementAmt_mean', 'OPAnnualDeductibleAmt_mean',
       'AgeAtService_mean', 'ClaimCost_mean', 'IPDuration_mean',
       'ClaimDuration_mean', 'InscClaimAmtReimbursed_sum',
       'DeductibleAmtPaid_sum', 'NoOfMonths_PartACov_sum',
       'NoOfMonths_PartBCov_sum', 'IPAnnualReimbursementAmt_sum',
       'IPAnnualDeductibleAmt_sum', 'OPAnnualReimbursementAmt_sum',
       'OPAnnualDeductibleAmt_sum', 'AgeAtService_sum', 'ClaimCost_sum',
       'IPDuration_sum', 'ClaimDuration_sum', 'Num_Claims'],
      dtype='object')

## New Columns

### Beneficiaries

### Doctors

### Codes

### Money

### Time

### Location