## Setup and Data Import

In [1]:
import sys
sys.path.insert(0, '..')

from joblib import dump, load

import Functions as fxns
from Functions import np, pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from datetime import timedelta

In [2]:
# !python ../Preprocessing.py

In [3]:
claims = load('../claims.pkl')

## Claims DF

### New Columns

In [4]:
claims['AgeAtClaim'] = ((claims.ClaimStartDt - claims.DOB)
                              / timedelta(days=365)).astype(int)

claims['HasDied'] = claims.DOD.notna()
fxns.re_encode_bool(claims, ['HasDied'])

In [5]:
chronic_cols     = \
    claims.columns[claims.columns.str.contains('Chronic')].to_list()
claims['NumConds'] = claims[chronic_cols].sum(axis=1)

In [6]:
physician_cols = \
    claims.columns[claims.columns.str.contains('Phys')].to_list()
phys_count_cols  = ['HasAllPhys', 'HasNoPhys']

claims['HasAllPhys'] = claims[physician_cols].notna().all(axis=1)
claims['HasNoPhys']  = claims[physician_cols].isna().all(axis=1)

fxns.re_encode_bool(claims, phys_count_cols)

In [7]:
claims['ClaimDuration'] = \
    (claims.ClaimEndDt  - claims.ClaimStartDt).dt.days + 1
claims['AdmitDuration']    = \
    (claims.DischargeDt - claims.AdmissionDt).dt.days + 1

In [8]:
claims['ClaimCost'] = \
    claims.InscClaimAmtReimbursed + claims.DeductibleAmtPaid
claims['DailyClaimCost'] = \
    claims.ClaimCost / claims.ClaimDuration
claims['InsReimbursementRatio'] = \
    claims.InscClaimAmtReimbursed / claims.ClaimCost

### Variables

In [9]:
# lists:
numeric_cols     = fxns.cols_by_dtype(claims)[0]
categorical_cols = fxns.cols_by_dtype(claims)[1]
date_cols        = fxns.cols_by_dtype(claims)[2]

physician_cols = \
    claims.columns[claims.columns.str.contains('Phys')].to_list()
chronic_cols     = \
    claims.columns[claims.columns.str.contains('Chronic')].to_list()
diagnosis_cols   = \
    claims.columns[claims.columns.str.contains('Diagnosis')].to_list()
procedure_cols   = \
    claims.columns[claims.columns.str.contains('Procedure')].to_list()

# dataframes:
outpatient_claims = claims.loc[claims.IsOutpatient == 1]
inpatient_claims  = claims.loc[claims.IsOutpatient == 0]

## Providers DF

In [30]:
providers = claims.groupby(['Provider', 'PotentialFraud', 'IsOutpatient'
                           ]).size().reset_index().drop(0, axis=1)

### Beneficiaries

In [31]:
providers['Perc_GenderZero'] = \
    claims.groupby(['Provider', 'IsOutpatient']).Gender.mean().values
providers['Perc_HasRenalDisease'] = \
    claims.groupby(['Provider', 'IsOutpatient']).RenalDisease.mean().values
providers['Mean_AgeAtClaim'] = \
    claims.groupby(['Provider', 'IsOutpatient']).AgeAtClaim.mean().values
providers['Perc_HasDied'] = \
    claims.groupby(['Provider', 'IsOutpatient']).HasDied.mean().values
providers['Mean_NumChronicConds'] = \
    claims.groupby(['Provider', 'IsOutpatient']).NumConds.mean().values

In [32]:
race_by_provider = \
    claims.groupby(['Provider', 'IsOutpatient', 'Race']).ClaimID.count().reset_index()

race = pd.DataFrame()
race['race1'] = race_by_provider[race_by_provider.Race == 1
                                ].ClaimID.to_list()
race['race2'] = race_by_provider[race_by_provider.Race == 2
                                ].ClaimID.to_list()
race['race3'] = race_by_provider[race_by_provider.Race == 3
                                ].ClaimID.to_list()
race['race5'] = race_by_provider[race_by_provider.Race == 5
                                ].ClaimID.to_list()

providers['Perc_RaceOne']   = race.race1 / np.sum(race, axis=1)
providers['Perc_RaceTwo']   = race.race2 / np.sum(race, axis=1)
providers['Perc_RaceThree'] = race.race3 / np.sum(race, axis=1)

In [33]:
for col in chronic_cols:
    colname = f'Perc_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

### Doctors

In [34]:
for col in phys_count_cols:
    colname = f'Perc_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

### Money

In [35]:
providers['Mean_InsReimbursementRatio'] = \
    claims.groupby(['Provider', 'IsOutpatient']).InsReimbursementRatio.mean().values

In [36]:
money_cols = ['InscClaimAmtReimbursed', 'DeductibleAmtPaid',
              'ClaimCost', 'DailyClaimCost',
              'IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt',
              'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt']

for col in money_cols:
    colname = f'Mean_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

### Time

In [37]:
mean_cols = ['NoOfMonths_PartACov', 'NoOfMonths_PartBCov',
 'ClaimDuration', 'AdmitDuration']

for col in mean_cols:
    colname = f'Mean_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

## Pivot

In [38]:
already_ip_op_cols = ['Mean_IPAnnualReimbursementAmt', 'Mean_IPAnnualDeductibleAmt',
                'Mean_OPAnnualReimbursementAmt', 'Mean_OPAnnualDeductibleAmt']
providers.drop(already_ip_op_cols, axis=1, inplace=True)

In [39]:
ip = providers[providers.IsOutpatient == 0].add_prefix('IP_')
ip.rename(columns={'IP_Provider': 'Provider',
                   'IP_PotentialFraud': 'PotentialFraud',
                   'IP_IsOutpatient': 'IsOutpatient'},
          inplace=True)

op = providers[providers.IsOutpatient == 1].add_prefix('OP_')
op.rename(columns={'OP_Provider': 'Provider',
                   'OP_PotentialFraud': 'PotentialFraud',
                   'OP_IsOutpatient': 'IsOutpatient'},
          inplace=True)

In [40]:
already_ip_op_cols = ['IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt',
                'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt']
already_ip_op = claims.groupby('Provider')[already_ip_op_cols].mean()

In [41]:
providers = claims.groupby('Provider').size().reset_index().drop(0, axis=1)
providers = providers.merge(op, on='Provider', how='left')
providers = providers.merge(ip, on='Provider', how='left')
providers = providers.merge(already_ip_op, on='Provider', how='left')

providers.drop(['PotentialFraud_x', 'IsOutpatient_x',
                'PotentialFraud_y', 'IsOutpatient_y',
                'OP_Mean_AdmitDuration'], axis=1, inplace=True)

pf = claims.groupby(['Provider', 'PotentialFraud']).size().reset_index().drop(0, axis=1)

providers = providers.merge(pf, on='Provider')

### Data Import

In [42]:
lucas = pd.read_csv('./data/Lucas_Providers.csv')
lucas.drop(['percentage_InOutpatients', 'percentage_noPhysician'],
           axis=1, inplace=True)
lucas.columns = \
    ['Provider', 'Perc_HasTop5AdmtCode']

In [43]:
ryan = pd.read_csv('./data/Ryan_Providers.csv')
ryan.drop(['Unnamed: 0', 'AllPhy_mean_IP', 'AllPhy_mean_OP',
           'NoPhy_mean_IP', 'NoPhy_mean_OP',
           'ClaimDuration_mean_IP', 'ClaimDuration_mean_OP',
           'InscClaimAmtReimbursed_mean_IP', 'InscClaimAmtReimbursed_mean_OP',
           'AdmisDuration_mean_IP', 'AdmisDuration_mean_OP',
           'AgeAtClm_mean_IP', 'AgeAtClm_mean_OP',
           'Chronic_Sum_mean_IP', 'Chronic_Sum_mean_OP',
           'DeductibleAmtPaid_mean_IP', 'DeductibleAmtPaid_mean_OP',
           'InsCovRatio_mean_IP', 'InsCovRatio_mean_OP',
           'RevPerDay_mean_IP', 'RevPerDay_mean_OP'],
          axis=1, inplace=True)

In [44]:
providers = providers.merge(ryan).merge(lucas)

### Post-processing

In [45]:
providers.fillna(0, inplace=True)

In [46]:
providers.rename(columns={
    'IPAnnualReimbursementAmt': 'IP_Mean_AnnualReimbursementAmt',
    'IPAnnualDeductibleAmt': 'IP_Mean_AnnualDeductibleAmt',
    'OPAnnualReimbursementAmt': 'OP_Mean_AnnualReimbursementAmt',
    'OPAnnualDeductibleAmt': 'OP_Mean_AnnualDeductibleAmt',
    'Claim_Patient_Ratio': 'Ratio_ClaimsPerPatient',
    'Patient_Attphy_Ratio': 'PatientsPerAttPhys',
    'Patient_Operphy_Ratio': 'PatientsPerOperPhys',
    'Patient_Otherphy_Ratio': 'PatientsPerOthPhys',
    'Claim_AttPhy_Ratio': 'Ratio_ClaimsPerAttPhys',
    'IsOutpatient_Perc': 'Perc_Outpatient',
    'BeneID_Nunique_IP': 'IP_Count_UniquePatients',
    'State_Nunique_IP': 'IP_Count_UniqueState',
    'BeneID_Nunique_OP': 'OP_Count_UniquePatients',
    'State_Nunique_OP': 'OP_Count_UniqueState',
    'Att_Phy_Mult_Prec': 'Perc_MultHospAttPhys',
    'Oper_Phy_Mult_Prec': 'Perc_MultHospOperPhys',
    'Other_Phy_Mult_Prec': 'Perc_MultHospOtherPhys',
    'IP_Multiple_Hospital_Prec': 'IP_Perc_MultHosp',
    'OP_Multiple_Hospital_Prec': 'OP_Perc_MultHosp',
    'Provider_Serve_BothIO': 'DualPatientProvider',
    'Bene_Receive_Both_IO_Perc': 'Perc_DualPatientType',
    'IP_Dup_Perc': 'IP_Perc_Duplicates',
    'OP_Dup_Perc': 'OP_Perc_Duplicates',
    'IP_No_Proc_Perc': 'IP_Perc_No_ProcCode',
    'OP_No_Diag_Perc': 'OP_Perc_No_DiagCode',
    'In_Top5_State_Perc': 'Perc_ClaimsPerTopFraudState'}, inplace=True);

In [47]:
providers = providers[[
    'Provider',
    'PotentialFraud',
    'Perc_Outpatient',
    'DualPatientProvider',
    'Perc_DualPatientType',
    'Ratio_ClaimsPerPatient',
    'Ratio_ClaimsPerAttPhys',
    'Perc_ClaimsPerTopFraudState',
    'Perc_HasTop5AdmtCode',
    'PatientsPerAttPhys',
    'PatientsPerOperPhys',
    'PatientsPerOthPhys',
    'Perc_MultHospAttPhys',
    'Perc_MultHospOperPhys',
    'Perc_MultHospOtherPhys',
    'IP_Perc_Duplicates',
    'IP_Count_UniquePatients',
    'IP_Count_UniqueState',
    'IP_Mean_AgeAtClaim',
    'IP_Perc_HasDied',
    'IP_Perc_GenderZero',
    'IP_Perc_RaceOne',
    'IP_Perc_RaceTwo',
    'IP_Perc_RaceThree',
    'IP_Mean_NumChronicConds',
    'IP_Perc_Alzheimers_Chronic',
    'IP_Perc_Cancer_Chronic',
    'IP_Perc_Depression_Chronic',
    'IP_Perc_Diabetes_Chronic',
    'IP_Perc_HeartFailure_Chronic',
    'IP_Perc_IschemicHeart_Chronic',
    'IP_Perc_KidneyDisease_Chronic',
    'IP_Perc_ObstrPulmonary_Chronic',
    'IP_Perc_Osteoporosis_Chronic',
    'IP_Perc_RheumatoidArthritis_Chronic',
    'IP_Perc_Stroke_Chronic',
    'IP_Perc_HasRenalDisease',
    'IP_Mean_ClaimDuration',
    'IP_Mean_AdmitDuration',
    'IP_Mean_NoOfMonths_PartACov',
    'IP_Mean_NoOfMonths_PartBCov',
    'IP_Mean_ClaimCost',
    'IP_Mean_DailyClaimCost',
    'IP_Mean_DeductibleAmtPaid',
    'IP_Mean_InscClaimAmtReimbursed',
    'IP_Mean_InsReimbursementRatio',
    'IP_Mean_AnnualDeductibleAmt',
    'IP_Mean_AnnualReimbursementAmt',
    'IP_Perc_No_ProcCode',
    'IP_Perc_HasAllPhys',
    'IP_Perc_HasNoPhys',
    'IP_Perc_MultHosp',
    'OP_Perc_Duplicates',
    'OP_Count_UniquePatients',
    'OP_Count_UniqueState',
    'OP_Mean_AgeAtClaim',
    'OP_Perc_HasDied',
    'OP_Perc_GenderZero',
    'OP_Perc_RaceOne',
    'OP_Perc_RaceTwo',
    'OP_Perc_RaceThree',
    'OP_Mean_NumChronicConds',
    'OP_Perc_Alzheimers_Chronic',
    'OP_Perc_Cancer_Chronic',
    'OP_Perc_Depression_Chronic',
    'OP_Perc_Diabetes_Chronic',
    'OP_Perc_HeartFailure_Chronic',
    'OP_Perc_IschemicHeart_Chronic',
    'OP_Perc_KidneyDisease_Chronic',
    'OP_Perc_ObstrPulmonary_Chronic',
    'OP_Perc_Osteoporosis_Chronic',
    'OP_Perc_RheumatoidArthritis_Chronic',
    'OP_Perc_Stroke_Chronic',
    'OP_Perc_HasRenalDisease',
    'OP_Mean_ClaimDuration',
    'OP_Mean_NoOfMonths_PartACov',
    'OP_Mean_NoOfMonths_PartBCov',
    'OP_Mean_ClaimCost',
    'OP_Mean_DailyClaimCost',
    'OP_Mean_DeductibleAmtPaid',
    'OP_Mean_InscClaimAmtReimbursed',
    'OP_Mean_InsReimbursementRatio',
    'OP_Mean_AnnualDeductibleAmt',
    'OP_Mean_AnnualReimbursementAmt',
    'OP_Perc_No_DiagCode',
    'OP_Perc_HasAllPhys',
    'OP_Perc_HasNoPhys',
    'OP_Perc_MultHosp']]

In [48]:
providers.sample(10)

Unnamed: 0,Provider,PotentialFraud,Perc_Outpatient,DualPatientProvider,Perc_DualPatientType,Ratio_ClaimsPerPatient,Ratio_ClaimsPerAttPhys,Perc_ClaimsPerTopFraudState,Perc_HasTop5AdmtCode,PatientsPerAttPhys,PatientsPerOperPhys,PatientsPerOthPhys,Perc_MultHospAttPhys,Perc_MultHospOperPhys,Perc_MultHospOtherPhys,IP_Perc_Duplicates,IP_Count_UniquePatients,IP_Count_UniqueState,IP_Mean_AgeAtClaim,IP_Perc_HasDied,IP_Perc_GenderZero,IP_Perc_RaceOne,IP_Perc_RaceTwo,IP_Perc_RaceThree,IP_Mean_NumChronicConds,IP_Perc_Alzheimers_Chronic,IP_Perc_Cancer_Chronic,IP_Perc_Depression_Chronic,IP_Perc_Diabetes_Chronic,IP_Perc_HeartFailure_Chronic,IP_Perc_IschemicHeart_Chronic,IP_Perc_KidneyDisease_Chronic,IP_Perc_ObstrPulmonary_Chronic,IP_Perc_Osteoporosis_Chronic,IP_Perc_RheumatoidArthritis_Chronic,IP_Perc_Stroke_Chronic,IP_Perc_HasRenalDisease,IP_Mean_ClaimDuration,IP_Mean_AdmitDuration,IP_Mean_NoOfMonths_PartACov,IP_Mean_NoOfMonths_PartBCov,IP_Mean_ClaimCost,IP_Mean_DailyClaimCost,IP_Mean_DeductibleAmtPaid,IP_Mean_InscClaimAmtReimbursed,IP_Mean_InsReimbursementRatio,IP_Mean_AnnualDeductibleAmt,IP_Mean_AnnualReimbursementAmt,IP_Perc_No_ProcCode,IP_Perc_HasAllPhys,IP_Perc_HasNoPhys,IP_Perc_MultHosp,OP_Perc_Duplicates,OP_Count_UniquePatients,OP_Count_UniqueState,OP_Mean_AgeAtClaim,OP_Perc_HasDied,OP_Perc_GenderZero,OP_Perc_RaceOne,OP_Perc_RaceTwo,OP_Perc_RaceThree,OP_Mean_NumChronicConds,OP_Perc_Alzheimers_Chronic,OP_Perc_Cancer_Chronic,OP_Perc_Depression_Chronic,OP_Perc_Diabetes_Chronic,OP_Perc_HeartFailure_Chronic,OP_Perc_IschemicHeart_Chronic,OP_Perc_KidneyDisease_Chronic,OP_Perc_ObstrPulmonary_Chronic,OP_Perc_Osteoporosis_Chronic,OP_Perc_RheumatoidArthritis_Chronic,OP_Perc_Stroke_Chronic,OP_Perc_HasRenalDisease,OP_Mean_ClaimDuration,OP_Mean_NoOfMonths_PartACov,OP_Mean_NoOfMonths_PartBCov,OP_Mean_ClaimCost,OP_Mean_DailyClaimCost,OP_Mean_DeductibleAmtPaid,OP_Mean_InscClaimAmtReimbursed,OP_Mean_InsReimbursementRatio,OP_Mean_AnnualDeductibleAmt,OP_Mean_AnnualReimbursementAmt,OP_Perc_No_DiagCode,OP_Perc_HasAllPhys,OP_Perc_HasNoPhys,OP_Perc_MultHosp
5241,PRV57560,0,0.5,1.0,1.0,1.0,1.0,0.0,0.0,0,1,2,0.0,0.5,0.0,0.0,1.0,1.0,77.0,0.0,1.0,0.705882,0.27451,0.019608,5.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,3.0,3.0,12.0,12.0,10068.0,3356.0,1068.0,9000.0,0.893921,1068.0,10000.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,50.0,0.0,0.0,0.0,0.0,0.0,8.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,12.0,12.0,0.0,0.0,0.0,0.0,0.0,485.0,1210.0,0.0,0.0,0.0,1.0
1530,PRV52897,0,1.0,0.0,0.377778,1.153846,1.875,0.0,0.0,15,33,21,0.088889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,593.333333,5517.777778,0.0,0.0,0.0,0.111111,0.340909,39.0,3.0,73.177778,0.0,0.377778,0.75,0.25,0.0,5.022222,0.466667,0.177778,0.533333,0.755556,0.666667,0.888889,0.444444,0.244444,0.355556,0.355556,0.133333,0.088889,1.666667,12.0,12.0,209.777778,195.155363,6.666667,203.111111,0.96686,602.0,2193.555556,0.022222,0.088889,0.0,0.933333
400,PRV51504,0,1.0,0.0,0.210938,1.04065,2.285714,0.890625,0.039062,67,108,90,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,374.40625,3299.84375,0.0,0.0,0.0,0.054688,0.464567,123.0,8.0,73.71875,0.007812,0.359375,0.8,0.0,0.1,4.125,0.351562,0.109375,0.421875,0.671875,0.5625,0.75,0.335938,0.296875,0.289062,0.273438,0.0625,0.203125,2.546875,12.0,12.0,279.140625,220.359766,3.4375,275.703125,0.981989,619.84375,2144.765625,0.007812,0.039062,0.0,0.921875
1590,PRV52976,0,0.0,0.0,1.0,1.0,4.0,0.0,0.25,3,2,4,1.0,0.0,0.0,0.0,4.0,1.0,75.5,0.0,0.75,0.737931,0.193103,0.041379,5.0,0.5,0.0,0.25,0.25,0.75,0.75,0.5,0.5,0.25,0.75,0.5,0.25,3.75,3.75,12.0,12.0,8318.0,2443.983333,1068.0,7250.0,0.842697,1335.0,9535.0,0.5,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,647.5,1455.0,0.0,0.0,0.0,0.75
1197,PRV52493,0,1.0,0.0,0.125,1.0,5.333333,0.0,0.0,13,15,14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,467.25,3700.0,0.0,0.0,0.0,0.0625,0.3125,16.0,1.0,74.9375,0.0,0.4375,0.934783,0.065217,0.0,5.0,0.5,0.125,0.5625,0.5625,0.625,0.8125,0.5,0.5,0.3125,0.375,0.125,0.125,1.0,11.25,12.0,248.125,248.125,0.0,248.125,1.0,392.5,1996.875,0.0,0.0,0.0,1.0
3365,PRV55218,0,1.0,0.0,0.340426,1.146341,2.764706,0.0,0.031915,48,69,65,0.308511,0.031915,0.138298,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,522.638298,4163.93617,0.0,0.0,0.0,0.085106,0.419355,82.0,5.0,70.787234,0.0,0.393617,0.828508,0.091314,0.040089,4.797872,0.382979,0.085106,0.43617,0.797872,0.702128,0.765957,0.489362,0.255319,0.340426,0.457447,0.085106,0.265957,2.840426,12.0,12.0,226.276596,134.640831,3.829787,222.446809,0.965967,759.893617,2455.0,0.010638,0.095745,0.010638,0.946809
4070,PRV56093,0,1.0,0.0,0.285714,1.0,1.75,0.0,0.0,3,7,6,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,610.285714,6577.142857,0.0,0.0,0.0,0.0,0.571429,7.0,1.0,83.428571,0.0,0.714286,0.824176,0.153846,0.010989,5.0,0.714286,0.0,0.428571,0.714286,1.0,0.714286,0.714286,0.285714,0.0,0.285714,0.142857,0.285714,6.714286,12.0,12.0,295.714286,244.013605,0.0,295.714286,1.0,580.0,1705.714286,0.0,0.0,0.0,1.0
3515,PRV55412,0,1.0,0.0,0.090909,2.75,3.666667,0.0,0.0,1,3,1,0.090909,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,291.272727,1364.545455,0.0,0.0,0.0,0.0,0.272727,4.0,1.0,71.636364,0.0,0.090909,0.939394,0.015152,0.030303,4.545455,0.090909,0.0,0.090909,0.818182,1.0,0.818182,0.727273,0.090909,0.181818,0.636364,0.090909,0.090909,3.363636,12.0,12.0,176.363636,119.666667,0.0,176.363636,1.0,618.181818,1262.727273,0.0,0.0,0.0,0.727273
2233,PRV53776,1,0.765,1.0,0.46,1.04712,2.409639,0.455,0.025,108,165,153,0.03,0.0,0.02,0.021277,45.0,2.0,75.404255,0.0,0.382979,0.939394,0.045455,0.015152,5.06383,0.531915,0.255319,0.382979,0.787234,0.638298,0.851064,0.489362,0.425532,0.276596,0.297872,0.12766,0.212766,6.06383,6.06383,12.0,12.0,12344.595745,3178.651603,1068.0,11276.595745,0.846533,833.96,7182.3,0.425532,0.021277,0.0,0.115,0.427632,148.0,6.0,73.326797,0.006536,0.424837,0.0,0.0,0.0,4.27451,0.333333,0.143791,0.411765,0.666667,0.607843,0.75817,0.418301,0.27451,0.320261,0.248366,0.091503,0.202614,2.300654,12.0,11.960784,277.777778,177.75712,3.529412,274.248366,0.986452,522.85,1800.95,0.006536,0.052288,0.006536,0.875
4451,PRV56570,0,0.566667,1.0,0.533333,1.111111,1.304348,0.0,0.033333,4,22,19,0.066667,0.033333,0.0,0.076923,12.0,1.0,79.307692,0.0,0.153846,0.925,0.0,0.075,7.230769,0.692308,0.538462,0.846154,0.923077,0.846154,0.923077,0.692308,0.615385,0.538462,0.384615,0.230769,0.538462,5.615385,5.615385,12.0,12.0,12452.615385,2433.479853,1068.0,11384.615385,0.864165,1412.666667,15079.666667,0.538462,0.153846,0.0,0.266667,0.3125,15.0,4.0,75.352941,0.0,0.352941,0.966667,0.0,0.033333,4.705882,0.529412,0.235294,0.411765,0.647059,0.647059,0.764706,0.588235,0.294118,0.235294,0.294118,0.058824,0.117647,1.176471,12.0,12.0,172.941176,147.058824,0.0,172.941176,1.0,964.0,2567.666667,0.058824,0.0,0.0,0.8


## Export

In [49]:
dump(providers, './data/Providers_Final.pkl')

['./data/Providers_Final.pkl']