## Setup and Data Import

In [1]:
import sys
sys.path.insert(0, '..')

from joblib import dump, load

import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)

import plotly.express as px

In [2]:
providers = load('./data/Providers_Final.pkl')
providers.set_index('Provider', inplace=True)
providers.shape

(5410, 87)

## Variables

In [14]:
binary_cols = ['PotentialFraud', 'DualPatientProvider']

general_cols = ['Perc_Outpatient', 'Perc_DualPatientType',
                'Ratio_ClaimsPerPatient', 'Ratio_ClaimsPerAttPhys',
                'Perc_ClaimsPerTopFraudState', 'Perc_HasTop5AdmtCode',
                'PatientsPerAttPhys', 'PatientsPerOperPhys',
                'PatientsPerOthPhys', 'Perc_MultHospAttPhys',
                'Perc_MultHospOperPhys', 'Perc_MultHospOtherPhys']

ip_cols = providers.columns[providers.columns.str.contains('IP_')].to_list()
op_cols = providers.columns[providers.columns.str.contains('OP_')].to_list()

In [4]:
ip_general_cols = ['IP_Perc_Duplicates', 'IP_Count_UniquePatients',
                   'IP_Count_UniqueState', 'IP_Mean_NumChronicConds',]

ip_patient_cols = \
    ['IP_Mean_AgeAtClaim', 'IP_Perc_HasDied',
     'IP_Perc_GenderZero', 'IP_Perc_RaceOne',
     'IP_Perc_RaceTwo', 'IP_Perc_RaceThree',
     'IP_Perc_Alzheimers_Chronic', 'IP_Perc_Cancer_Chronic',
     'IP_Perc_Depression_Chronic', 'IP_Perc_Diabetes_Chronic',
     'IP_Perc_HeartFailure_Chronic', 'IP_Perc_IschemicHeart_Chronic',
     'IP_Perc_KidneyDisease_Chronic', 'IP_Perc_ObstrPulmonary_Chronic',
     'IP_Perc_Osteoporosis_Chronic', 'IP_Perc_RheumatoidArthritis_Chronic',
     'IP_Perc_Stroke_Chronic', 'IP_Perc_HasRenalDisease',
     'IP_Perc_HasAllPhys', 'IP_Perc_HasNoPhys',
     'IP_Perc_MultHosp']

ip_claim_cols = ['IP_Mean_ClaimDuration', 'IP_Mean_AdmitDuration',
                 'IP_Mean_NoOfMonths_PartACov', 'IP_Mean_NoOfMonths_PartBCov',
                 'IP_Mean_ClaimCost', 'IP_Mean_DailyClaimCost',
                 'IP_Mean_DeductibleAmtPaid', 'IP_Mean_InscClaimAmtReimbursed',
                 'IP_Mean_InsReimbursementRatio', 'IP_Mean_AnnualDeductibleAmt',
                 'IP_Mean_AnnualReimbursementAmt',  'IP_Perc_No_ProcCode']

In [5]:
op_general_cols = ['OP_Perc_Duplicates', 'OP_Count_UniquePatients',
                   'OP_Count_UniqueState', 'OP_Mean_NumChronicConds',]

op_patient_cols = \
    ['OP_Mean_AgeAtClaim', 'OP_Perc_HasDied',
     'OP_Perc_GenderZero', 'OP_Perc_RaceOne',
     'OP_Perc_RaceTwo', 'OP_Perc_RaceThree',
     'OP_Perc_Alzheimers_Chronic', 'OP_Perc_Cancer_Chronic',
     'OP_Perc_Depression_Chronic', 'OP_Perc_Diabetes_Chronic',
     'OP_Perc_HeartFailure_Chronic', 'OP_Perc_IschemicHeart_Chronic',
     'OP_Perc_KidneyDisease_Chronic', 'OP_Perc_ObstrPulmonary_Chronic',
     'OP_Perc_Osteoporosis_Chronic', 'OP_Perc_RheumatoidArthritis_Chronic',
     'OP_Perc_Stroke_Chronic', 'OP_Perc_HasRenalDisease',
     'OP_Perc_HasAllPhys', 'OP_Perc_HasNoPhys',
     'OP_Perc_MultHosp']

op_claim_cols = ['OP_Mean_ClaimDuration', 'OP_Mean_AdmitDuration',
                 'OP_Mean_NoOfMonths_PartACov', 'OP_Mean_NoOfMonths_PartBCov',
                 'OP_Mean_ClaimCost', 'OP_Mean_DailyClaimCost',
                 'OP_Mean_DeductibleAmtPaid', 'OP_Mean_InscClaimAmtReimbursed',
                 'OP_Mean_InsReimbursementRatio', 'OP_Mean_AnnualDeductibleAmt',
                 'OP_Mean_AnnualReimbursementAmt',  'OP_Perc_No_ProcCode']

## Scatter Matrices

In [23]:
for col in ip_patient_cols:
    fig = px.histogram(providers[col])
    fig.show()