## Setup and Data Import

In [1]:
import sys
sys.path.insert(0, '..')

from joblib import dump, load

import Functions as fxns
from Functions import np, pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from datetime import timedelta

In [2]:
# !python ../Preprocessing.py

In [3]:
claims = load('../claims.pkl')

## Claims DF

### New Columns

In [4]:
claims['AgeAtClaim'] = ((claims.ClaimStartDt - claims.DOB)
                              / timedelta(days=365)).astype(int)

claims['HasDied'] = claims.DOD.notna()
fxns.re_encode_bool(claims, ['HasDied'])

chronic_cols     = \
    claims.columns[claims.columns.str.contains('Chronic')].to_list()
claims['NumConds'] = claims[chronic_cols].sum(axis=1)

In [5]:
physician_cols = \
    claims.columns[claims.columns.str.contains('Phys')].to_list()
phys_count_cols  = ['HasAllPhys', 'HasNoPhys']

claims['HasAllPhys'] = claims[physician_cols].notna().all(axis=1)
claims['HasNoPhys']  = claims[physician_cols].isna().all(axis=1)

fxns.re_encode_bool(claims, phys_count_cols)

In [6]:
claims['ClaimDuration'] = \
    (claims.ClaimEndDt  - claims.ClaimStartDt).dt.days + 1
claims['AdmitDuration']    = \
    (claims.DischargeDt - claims.AdmissionDt).dt.days + 1

In [7]:
claims['ClaimCost'] = \
    claims.InscClaimAmtReimbursed + claims.DeductibleAmtPaid
claims['DailyClaimCost'] = \
    claims.ClaimCost / claims.ClaimDuration
claims['InsReimbursementRatio'] = \
    claims.InscClaimAmtReimbursed / claims.ClaimCost

### Variables

In [8]:
# lists:
numeric_cols     = fxns.cols_by_dtype(claims)[0]
categorical_cols = fxns.cols_by_dtype(claims)[1]
date_cols        = fxns.cols_by_dtype(claims)[2]

physician_cols = \
    claims.columns[claims.columns.str.contains('Phys')].to_list()
chronic_cols     = \
    claims.columns[claims.columns.str.contains('Chronic')].to_list()
diagnosis_cols   = \
    claims.columns[claims.columns.str.contains('Diagnosis')].to_list()
procedure_cols   = \
    claims.columns[claims.columns.str.contains('Procedure')].to_list()

# dataframes:
# outpatient_claims = claims.loc[claims.IsOutpatient == 1]
# inpatient_claims  = claims.loc[claims.IsOutpatient == 0]

## Providers DF

In [9]:
providers = claims.groupby(['Provider', 'PotentialFraud', 'IsOutpatient'
                           ]).size().reset_index().drop(0, axis=1)

### Beneficiaries

In [10]:
providers['Perc_GenderZero'] = \
    claims.groupby(['Provider', 'IsOutpatient']).Gender.mean().values
providers['Perc_HasRenalDisease'] = \
    claims.groupby(['Provider', 'IsOutpatient']).RenalDisease.mean().values
providers['Mean_AgeAtClaim'] = \
    claims.groupby(['Provider', 'IsOutpatient']).AgeAtClaim.mean().values
providers['Perc_HasDied'] = \
    claims.groupby(['Provider', 'IsOutpatient']).HasDied.mean().values
providers['Mean_NumChronicConds'] = \
    claims.groupby(['Provider', 'IsOutpatient']).NumConds.mean().values

In [11]:
race_by_provider = \
    claims.groupby(['Provider', 'IsOutpatient', 'Race']).ClaimID.count().reset_index()

race = pd.DataFrame()
race['race1'] = race_by_provider[race_by_provider.Race == 1
                                ].ClaimID.to_list()
race['race2'] = race_by_provider[race_by_provider.Race == 2
                                ].ClaimID.to_list()
race['race3'] = race_by_provider[race_by_provider.Race == 3
                                ].ClaimID.to_list()
race['race5'] = race_by_provider[race_by_provider.Race == 5
                                ].ClaimID.to_list()

providers['Perc_RaceOne']   = race.race1 / np.sum(race, axis=1)
providers['Perc_RaceTwo']   = race.race2 / np.sum(race, axis=1)
providers['Perc_RaceThree'] = race.race3 / np.sum(race, axis=1)

In [12]:
for col in chronic_cols:
    colname = f'Perc_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

### Doctors

In [13]:
for col in phys_count_cols:
    colname = f'Perc_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

### Money

In [14]:
providers['Perc_InsReimbursementRatio'] = \
    claims.groupby(['Provider', 'IsOutpatient']).InsReimbursementRatio.mean().values

In [15]:
money_cols = ['InscClaimAmtReimbursed', 'DeductibleAmtPaid',
              'ClaimCost', 'DailyClaimCost',
              'IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt',
              'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt']

for col in money_cols:
    colname = f'Mean_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

### Time

In [16]:
mean_cols = ['NoOfMonths_PartACov', 'NoOfMonths_PartBCov',
 'ClaimDuration', 'AdmitDuration']

for col in mean_cols:
    colname = f'Mean_{col}'
    providers[colname] = claims.groupby(['Provider', 'IsOutpatient'
                                        ])[col].mean().values

## Pivot

In [17]:
already_ip_op_cols = ['Mean_IPAnnualReimbursementAmt', 'Mean_IPAnnualDeductibleAmt',
                'Mean_OPAnnualReimbursementAmt', 'Mean_OPAnnualDeductibleAmt']
providers.drop(already_ip_op_cols, axis=1, inplace=True)

In [18]:
ip = providers[providers.IsOutpatient == 0].add_prefix('IP_')
ip.rename(columns={'IP_Provider': 'Provider',
                   'IP_PotentialFraud': 'PotentialFraud',
                   'IP_IsOutpatient': 'IsOutpatient'},
          inplace=True)

op = providers[providers.IsOutpatient == 1].add_prefix('OP_')
op.rename(columns={'OP_Provider': 'Provider',
                   'OP_PotentialFraud': 'PotentialFraud',
                   'OP_IsOutpatient': 'IsOutpatient'},
          inplace=True)

In [19]:
already_ip_op_cols = ['IPAnnualReimbursementAmt', 'IPAnnualDeductibleAmt',
                'OPAnnualReimbursementAmt', 'OPAnnualDeductibleAmt']
already_ip_op = claims.groupby('Provider')[already_ip_op_cols].mean()

In [20]:
providers = claims.groupby('Provider').size().reset_index().drop(0, axis=1)
providers = providers.merge(op, on='Provider', how='left')
providers = providers.merge(ip, on='Provider', how='left')
providers = providers.merge(already_ip_op, on='Provider', how='left')

providers.drop(['PotentialFraud_x', 'IsOutpatient_x',
                'PotentialFraud_y', 'IsOutpatient_y',
                'OP_Mean_AdmitDuration'], axis=1, inplace=True)
pf = claims.groupby(['Provider', 'PotentialFraud']).size().reset_index().drop(0, axis=1)

providers = providers.merge(pf, on='Provider')

### Data Import

In [21]:
lucas = pd.read_csv('./data/Lucas_Providers.csv')
print(lucas.columns)
lucas.drop(['percentage_InOutpatients', 'percentage_noPhysician'],
           axis=1, inplace=True)
lucas.columns = \
    ['Provider', 'HasTop5AdmtCode']
lucas.head()

Index(['Provider', 'percentage_InOutpatients', 'percentage_noPhysician',
       'isTop5admtcode'],
      dtype='object')


Unnamed: 0,Provider,HasTop5AdmtCode
0,PRV51001,0.04
1,PRV51003,0.060606
2,PRV51004,0.013423
3,PRV51005,0.032618
4,PRV51007,0.027778


In [22]:
ryan = pd.read_csv('./data/Ryan_Providers.csv')
print(ryan.columns)
ryan.drop(['Unnamed: 0', 'AllPhy_mean_IP', 'AllPhy_mean_OP',
           'NoPhy_mean_IP', 'NoPhy_mean_OP',
           'ClaimDuration_mean_IP', 'ClaimDuration_mean_OP',
           'InscClaimAmtReimbursed_mean_IP', 'InscClaimAmtReimbursed_mean_OP',
           'AdmisDuration_mean_IP', 'AdmisDuration_mean_OP',
           'AgeAtClm_mean_IP', 'AgeAtClm_mean_OP',
           'Chronic_Sum_mean_IP', 'Chronic_Sum_mean_OP',
           'DeductibleAmtPaid_mean_IP', 'DeductibleAmtPaid_mean_OP',
           'InsCovRatio_mean_IP', 'InsCovRatio_mean_OP',
           'RevPerDay_mean_IP', 'RevPerDay_mean_OP'],
          axis=1, inplace=True)

Index(['Unnamed: 0', 'Provider', 'Patient_Attphy_Ratio',
       'Patient_Operphy_Ratio', 'Patient_Otherphy_Ratio',
       'Claim_Patient_Ratio', 'Claim_AttPhy_Ratio', 'IsOutpatient_Perc',
       'BeneID_Nunique_IP', 'State_Nunique_IP', 'BeneID_Nunique_OP',
       'State_Nunique_OP', 'AllPhy_mean_IP', 'NoPhy_mean_IP',
       'ClaimDuration_mean_IP', 'InscClaimAmtReimbursed_mean_IP',
       'AdmisDuration_mean_IP', 'AgeAtClm_mean_IP',
       'DeductibleAmtPaid_mean_IP', 'Chronic_Sum_mean_IP',
       'InsCovRatio_mean_IP', 'RevPerDay_mean_IP', 'AllPhy_mean_OP',
       'NoPhy_mean_OP', 'ClaimDuration_mean_OP',
       'InscClaimAmtReimbursed_mean_OP', 'AdmisDuration_mean_OP',
       'AgeAtClm_mean_OP', 'DeductibleAmtPaid_mean_OP', 'Chronic_Sum_mean_OP',
       'InsCovRatio_mean_OP', 'RevPerDay_mean_OP', 'Att_Phy_Mult_Prec',
       'Oper_Phy_Mult_Prec', 'Other_Phy_Mult_Prec',
       'IP_Multiple_Hospital_Prec', 'OP_Multiple_Hospital_Prec',
       'Bene_Receive_Both_IO_Perc', 'Provider_Serve_

In [23]:
providers = providers.merge(ryan).merge(lucas)

### Post-processing

In [24]:
providers.fillna(0, inplace=True)

In [25]:
providers.rename(columns={
    'IPAnnualReimbursementAmt': 'IP_AnnualReimbursementAmt',
    'IPAnnualDeductibleAmt': 'IP_AnnualDeductibleAmt',
    'OPAnnualReimbursementAmt': 'OP_AnnualReimbursementAmt',
    'OPAnnualDeductibleAmt': 'OP_AnnualDeductibleAmt',
    'Claim_Patient_Ratio': 'Ratio_ClaimsPerPatient',
    'Patient_Attphy_Ratio': 'PatientsPerAttPhys',
    'Patient_Operphy_Ratio': 'PatientsPerOperPhys',
    'Patient_Otherphy_Ratio': 'PatientsPerOthPhys',
    'Claim_AttPhy_Ratio': 'Ratio_ClaimsPerAttPhys',
    'IsOutpatient_Perc': 'Perc_Outpatient',
    'BeneID_Nunique_IP': 'IP_Count_UniquePatients',
    'State_Nunique_IP': 'IP_Count_UniqueState',
    'BeneID_Nunique_OP': 'OP_Count_UniquePatients',
    'State_Nunique_OP': 'OP_Count_UniqueState',
    'Att_Phy_Mult_Prec': 'Perc_MultHospAttPhys',
    'Oper_Phy_Mult_Prec': 'Perc_MultHospOperPhys',
    'Other_Phy_Mult_Prec': 'Perc_MultHospOtherPhys',
    'IP_Multiple_Hospital_Prec': 'IP_Perc_MultHosp',
    'OP_Multiple_Hospital_Prec': 'OP_Perc_MultHosp',
    'Provider_Serve_BothIO': 'DualPatientProvider',
    'Bene_Receive_Both_IO_Perc': 'Perc_DualPatientType',
    'IP_Dup_Perc': 'IP_Perc_Duplicates',
    'OP_Dup_Perc': 'OP_Perc_Duplicates',
    'IP_No_Proc_Perc': 'IP_Perc_No_ProcCode',
    'OP_No_Diag_Perc': 'OP_Perc_No_DiagCode',
    'In_Top5_State_Perc': 'Perc_ClaimsPerTopFraudState'}, inplace=True);

In [26]:
providers = providers[[
    'Provider',
    'PotentialFraud',
    'Perc_Outpatient',
    'HasTop5AdmtCode',
    'PatientsPerAttPhys',
    'PatientsPerOperPhys',
    'PatientsPerOthPhys',
    'Perc_MultHospAttPhys',
    'Perc_MultHospOperPhys',
    'Perc_MultHospOtherPhys',
    'Perc_ClaimsPerTopFraudState',
    'DualPatientProvider',
    'Perc_DualPatientType',
    'Ratio_ClaimsPerAttPhys',
    'Ratio_ClaimsPerPatient',
    'IP_AnnualDeductibleAmt',
    'IP_AnnualReimbursementAmt',
    'IP_Count_UniquePatients',
    'IP_Count_UniqueState',
    'IP_Mean_AdmitDuration',
    'IP_Mean_ClaimCost',
    'IP_Mean_ClaimDuration',
    'IP_Mean_DailyClaimCost',
    'IP_Mean_DeductibleAmtPaid',
    'IP_Mean_InscClaimAmtReimbursed',
    'IP_Mean_NoOfMonths_PartACov',
    'IP_Mean_NoOfMonths_PartBCov',
    'IP_Perc_Alzheimers_Chronic',
    'IP_Perc_Cancer_Chronic',
    'IP_Perc_Depression_Chronic',
    'IP_Perc_Diabetes_Chronic',
    'IP_Perc_Duplicates',
    'IP_Perc_GenderZero',
    'IP_Perc_HasAllPhys',
    'IP_Perc_HasDied',
    'IP_Perc_HasNoPhys',
    'IP_Perc_HasRenalDisease',
    'IP_Perc_HeartFailure_Chronic',
    'IP_Perc_InsReimbursementRatio',
    'IP_Perc_IschemicHeart_Chronic',
    'IP_Perc_KidneyDisease_Chronic',
    'IP_Mean_AgeAtClaim',
    'IP_Mean_NumChronicConds',
    'IP_Perc_MultHosp',
    'IP_Perc_No_ProcCode',
    'IP_Perc_ObstrPulmonary_Chronic',
    'IP_Perc_Osteoporosis_Chronic',
    'IP_Perc_RaceOne',
    'IP_Perc_RaceThree',
    'IP_Perc_RaceTwo',
    'IP_Perc_RheumatoidArthritis_Chronic',
    'IP_Perc_Stroke_Chronic',
    'OP_AnnualDeductibleAmt',
    'OP_AnnualReimbursementAmt',
    'OP_Count_UniquePatients',
    'OP_Count_UniqueState',
    'OP_Mean_ClaimCost',
    'OP_Mean_ClaimDuration',
    'OP_Mean_DailyClaimCost',
    'OP_Mean_DeductibleAmtPaid',
    'OP_Mean_InscClaimAmtReimbursed',
    'OP_Mean_NoOfMonths_PartACov',
    'OP_Mean_NoOfMonths_PartBCov',
    'OP_Perc_Alzheimers_Chronic',
    'OP_Perc_Cancer_Chronic',
    'OP_Perc_Depression_Chronic',
    'OP_Perc_Diabetes_Chronic',
    'OP_Perc_Duplicates',
    'OP_Perc_GenderZero',
    'OP_Perc_HasAllPhys',
    'OP_Perc_HasDied',
    'OP_Perc_HasNoPhys',
    'OP_Perc_HasRenalDisease',
    'OP_Perc_HeartFailure_Chronic',
    'OP_Perc_InsReimbursementRatio',
    'OP_Perc_IschemicHeart_Chronic',
    'OP_Perc_KidneyDisease_Chronic',
    'OP_Mean_AgeAtClaim',
    'OP_Mean_NumChronicConds',
    'OP_Perc_MultHosp',
    'OP_Perc_No_DiagCode',
    'OP_Perc_ObstrPulmonary_Chronic',
    'OP_Perc_Osteoporosis_Chronic',
    'OP_Perc_RaceOne',
    'OP_Perc_RaceThree',
    'OP_Perc_RaceTwo',
    'OP_Perc_RheumatoidArthritis_Chronic',
    'OP_Perc_Stroke_Chronic']]

In [27]:
providers.sample(10)

Unnamed: 0,Provider,PotentialFraud,Perc_Outpatient,HasTop5AdmtCode,PatientsPerAttPhys,PatientsPerOperPhys,PatientsPerOthPhys,Perc_MultHospAttPhys,Perc_MultHospOperPhys,Perc_MultHospOtherPhys,Perc_ClaimsPerTopFraudState,DualPatientProvider,Perc_DualPatientType,Ratio_ClaimsPerAttPhys,Ratio_ClaimsPerPatient,IP_AnnualDeductibleAmt,IP_AnnualReimbursementAmt,IP_Count_UniquePatients,IP_Count_UniqueState,IP_Mean_AdmitDuration,IP_Mean_ClaimCost,IP_Mean_ClaimDuration,IP_Mean_DailyClaimCost,IP_Mean_DeductibleAmtPaid,IP_Mean_InscClaimAmtReimbursed,IP_Mean_NoOfMonths_PartACov,IP_Mean_NoOfMonths_PartBCov,IP_Perc_Alzheimers_Chronic,IP_Perc_Cancer_Chronic,IP_Perc_Depression_Chronic,IP_Perc_Diabetes_Chronic,IP_Perc_Duplicates,IP_Perc_GenderZero,IP_Perc_HasAllPhys,IP_Perc_HasDied,IP_Perc_HasNoPhys,IP_Perc_HasRenalDisease,IP_Perc_HeartFailure_Chronic,IP_Perc_InsReimbursementRatio,IP_Perc_IschemicHeart_Chronic,IP_Perc_KidneyDisease_Chronic,IP_Mean_AgeAtClaim,IP_Mean_NumChronicConds,IP_Perc_MultHosp,IP_Perc_No_ProcCode,IP_Perc_ObstrPulmonary_Chronic,IP_Perc_Osteoporosis_Chronic,IP_Perc_RaceOne,IP_Perc_RaceThree,IP_Perc_RaceTwo,IP_Perc_RheumatoidArthritis_Chronic,IP_Perc_Stroke_Chronic,OP_AnnualDeductibleAmt,OP_AnnualReimbursementAmt,OP_Count_UniquePatients,OP_Count_UniqueState,OP_Mean_ClaimCost,OP_Mean_ClaimDuration,OP_Mean_DailyClaimCost,OP_Mean_DeductibleAmtPaid,OP_Mean_InscClaimAmtReimbursed,OP_Mean_NoOfMonths_PartACov,OP_Mean_NoOfMonths_PartBCov,OP_Perc_Alzheimers_Chronic,OP_Perc_Cancer_Chronic,OP_Perc_Depression_Chronic,OP_Perc_Diabetes_Chronic,OP_Perc_Duplicates,OP_Perc_GenderZero,OP_Perc_HasAllPhys,OP_Perc_HasDied,OP_Perc_HasNoPhys,OP_Perc_HasRenalDisease,OP_Perc_HeartFailure_Chronic,OP_Perc_InsReimbursementRatio,OP_Perc_IschemicHeart_Chronic,OP_Perc_KidneyDisease_Chronic,OP_Mean_AgeAtClaim,OP_Mean_NumChronicConds,OP_Perc_MultHosp,OP_Perc_No_DiagCode,OP_Perc_ObstrPulmonary_Chronic,OP_Perc_Osteoporosis_Chronic,OP_Perc_RaceOne,OP_Perc_RaceThree,OP_Perc_RaceTwo,OP_Perc_RheumatoidArthritis_Chronic,OP_Perc_Stroke_Chronic
3322,PRV55166,0,1.0,0.013889,42,63,53,0.069444,0.0,0.027778,0.0,0.0,0.152778,2.571429,1.028571,360.166667,2901.805556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,446.527778,1467.777778,70.0,2.0,277.083333,1.875,242.220294,2.916667,274.166667,11.833333,12.0,0.486111,0.111111,0.472222,0.736111,0.5,0.347222,0.083333,0.013889,0.0,0.152778,0.666667,0.979762,0.763889,0.361111,68.611111,4.597222,0.916667,0.027778,0.277778,0.208333,0.955556,0.0,0.044444,0.361111,0.152778
2573,PRV54204,0,0.0,0.25,2,3,4,0.0,0.0,0.0,0.0,0.0,0.75,2.0,1.0,4716.5,46325.0,4.0,1.0,10.0,16618.0,10.0,1220.874444,1068.0,15550.0,12.0,12.0,0.25,0.0,0.0,0.75,0.0,0.5,0.0,0.0,0.0,0.5,0.75,0.694342,0.75,0.75,72.0,5.0,0.75,0.5,0.5,0.5,0.8,0.0,0.2,0.5,0.25,1070.0,3317.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3807,PRV55767,0,1.0,0.0,0,1,2,0.5,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,450.0,2.0,1.0,40.0,1.0,40.0,0.0,40.0,12.0,12.0,0.5,0.0,0.0,1.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5,0.0,70.0,2.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2284,PRV53845,0,1.0,0.0,17,17,18,1.0,0.2,0.0,1.0,0.0,0.25,20.0,1.111111,987.4,7932.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,365.0,1511.0,18.0,1.0,475.0,1.4,458.428571,17.0,458.0,12.0,12.0,0.4,0.1,0.4,0.85,0.473684,0.4,0.0,0.0,0.0,0.2,0.65,0.915714,0.85,0.45,75.95,4.65,1.0,0.05,0.35,0.35,0.895833,0.020833,0.083333,0.2,0.05
3289,PRV55128,0,0.0,0.0,5,6,6,0.0,0.0,0.0,0.0,0.0,1.0,2.666667,1.0,4519.5,23568.75,8.0,1.0,7.875,14943.0,7.875,2922.740842,1068.0,13875.0,12.0,12.0,0.875,0.375,0.5,1.0,0.0,0.375,0.375,0.0,0.0,0.625,1.0,0.89493,0.75,0.875,81.0,6.625,0.75,0.125,0.25,0.5,0.0,0.0,0.0,0.25,0.25,991.25,3231.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2101,PRV53622,0,1.0,0.0,1,5,2,0.5,0.166667,0.0,0.0,0.0,0.166667,1.2,1.0,534.0,11333.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,486.666667,2063.333333,6.0,2.0,76.666667,1.0,76.666667,0.0,76.666667,10.0,12.0,0.666667,0.166667,0.666667,0.666667,0.8,0.333333,0.166667,0.0,0.0,0.0,0.5,1.0,0.5,0.333333,70.5,4.5,0.833333,0.166667,0.166667,0.333333,1.0,0.0,0.0,0.166667,0.333333
2273,PRV53832,0,1.0,0.083333,11,11,11,0.0,0.0,0.0,1.0,0.0,0.416667,12.0,1.0,890.0,9260.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,920.833333,3967.5,12.0,1.0,409.166667,3.083333,141.289683,0.0,409.166667,12.0,12.0,0.583333,0.166667,0.583333,0.916667,0.416667,0.416667,0.083333,0.0,0.0,0.333333,0.75,1.0,0.833333,0.583333,71.5,5.666667,1.0,0.0,0.25,0.083333,0.947368,0.0,0.052632,0.583333,0.333333
2378,PRV53969,0,1.0,0.025424,56,59,61,0.271186,0.025424,0.194915,0.0,0.0,0.279661,7.375,1.638889,334.881356,2539.830508,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,492.627119,1519.237288,72.0,3.0,307.457627,2.194915,240.31477,2.457627,305.0,11.898305,12.0,0.389831,0.135593,0.398305,0.635593,0.452174,0.381356,0.101695,0.008475,0.0,0.194915,0.508475,0.980858,0.601695,0.29661,71.618644,3.805085,0.567797,0.025424,0.237288,0.313559,0.921409,0.01355,0.065041,0.228814,0.059322
3695,PRV55626,0,1.0,0.0,5,8,8,0.111111,0.0,0.0,0.0,0.0,0.333333,2.25,1.0,356.0,2791.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,322.222222,938.888889,9.0,1.0,90.0,1.0,90.0,0.0,90.0,12.0,12.0,0.444444,0.111111,0.111111,0.666667,0.333333,0.333333,0.222222,0.0,0.0,0.222222,0.555556,1.0,1.0,0.555556,78.444444,4.444444,1.0,0.0,0.333333,0.333333,0.878788,0.0,0.121212,0.333333,0.0
4223,PRV56281,0,1.0,0.037037,22,22,22,1.0,0.0,0.407407,0.0,0.0,0.407407,27.0,2.25,474.666667,8259.259259,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,972.777778,3898.148148,24.0,1.0,450.740741,3.259259,229.831349,3.888889,446.851852,12.0,12.0,0.296296,0.111111,0.388889,0.740741,0.351852,0.425926,0.111111,0.0,0.0,0.240741,0.740741,0.966422,0.777778,0.592593,70.574074,4.537037,0.833333,0.0,0.148148,0.277778,0.0,0.0,0.0,0.314815,0.148148


## Export

In [28]:
providers.to_csv('./data/Providers_Final.csv')