# Physician & Other Supplier Payments 2015 - bene_day_srvc_cnt mean, max

### Import libraries

In [155]:
import pandas as pd 
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as sns

### Read in csv for 2015, 2016, & 2017

In [162]:
prov_pmnt_2015 = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2015.txt', sep='\t',
    usecols = ['nppes_entity_code', 'medicare_participation_indicator', 'place_of_service', 'hcpcs_code',
    'bene_day_srvc_cnt'],
    dtype = {'nppes_entity_code' : 'category', 'place_of_service' : 'category'}
    )
prov_pmnt_2015 = prov_pmnt_2015.drop([0])
prov_pmnt_2015 = prov_pmnt_2015.reset_index(drop = True)

In [164]:
prov_pmnt_2017 = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2017.txt', sep='\t',
    usecols = ['nppes_entity_code', 'medicare_participation_indicator', 'place_of_service', 'hcpcs_code',
    'bene_day_srvc_cnt'],
    dtype = {'nppes_entity_code' : 'category', 'place_of_service' : 'category'}
    )
prov_pmnt_2017 = prov_pmnt_2017.drop([0])
prov_pmnt_2017 = prov_pmnt_2017.reset_index(drop = True)

### Functions

In [140]:
def getlist(dict):
    """Converts the keys from a dictionary into a list."""
    list = []
    for key in dict.keys():
        list.append(key)
    return list 

# Extract top 10 HCPCS codes for each entity type/place of service combination - I:O, I:F, O:O, O:F

# Then find the bene_day_srvc_cnt mean and max for each HCPCS code for each combination

## Individual - Non Facility or I:O

#### Filter data based on entity code, place of service, and medicare indicator, then pass top ten HCPCS codes into a list

In [146]:
top_10_io_codes_2015 = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["I"]) &
    prov_pmnt_2015.place_of_service.isin(["O"]) &
    prov_pmnt_2015.medicare_participation_indicator.isin(["Y"]))] 
top_10_io_codes_2015_dict = dict(top_10_io_codes_2015.hcpcs_code.value_counts().head(10))

top_10_io_codes_2017 = prov_pmnt_2017[(
    prov_pmnt_2017.nppes_entity_code.isin(["I"]) &
    prov_pmnt_2017.place_of_service.isin(["O"]) &
    prov_pmnt_2017.medicare_participation_indicator.isin(["Y"]))] 
top_10_io_codes_2017_dict = dict(top_10_io_codes_2017.hcpcs_code.value_counts().head(10))

#### Use getlist() function (defined above) to turn dictionary to a list

In [151]:
top_10_io_codes_2015_list = getlist(top_10_io_codes_2015_dict)

top_10_io_codes_2017_list = getlist(top_10_io_codes_2017_dict)

#### Create df filtered on entity code = I, place of service = O, and HCPCS code

In [116]:
top_10_io_2015 = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["I"]) &
    prov_pmnt_2015.place_of_service.isin(["O"]) &
    prov_pmnt_2015.hcpcs_code.isin(['99213',
 '99214',
 '99203',
 '99204',
 '99212',
 'G0008',
 '36415',
 '99215',
 '93000',
 '96372'])
    )] 

#### Pivot data frame to reflect the mean of bene_day_srvc_cnt for each HCPCS code

In [117]:
io_2015_pivot_mean = top_10_io_2015.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.mean)
io_2015_pivot_mean.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
36415    282.624990
99214    261.038394
99213    243.657230
93000    131.538328
96372    112.228931
99212     99.582081
99215     83.043327
G0008     69.168324
99203     61.824450
99204     54.142617
Name: bene_day_srvc_cnt, dtype: float64

#### Repeat the same steps for max of bene_day_srvc_cnt

In [118]:
io_2015_pivot_max = top_10_io_2015.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.max)
io_2015_pivot_max.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
36415    117129.0
99213     12572.0
99214     12326.0
G0008     11567.0
99212     10804.0
96372      9178.0
99203      7262.0
93000      6908.0
99215      4765.0
99204      3436.0
Name: bene_day_srvc_cnt, dtype: float64

In [81]:
#messing around with pd.melt()
#io_melt = pd.melt(frame = top_10_IO, id_vars = ['hcpcs_code', 'bene_day_srvc_cnt'])

## Individual - Facility or I:F

#### Filter data based on entity code, place of service, and medicare indicator, then pass top ten HCPCS codes into a dictionary

In [119]:
top_10_if_codes_2015 = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["I"]) &
    prov_pmnt_2015.place_of_service.isin(["F"]) &
    prov_pmnt_2015.medicare_participation_indicator.isin(["Y"]))] 
top_10_if_codes_dict = dict(top_10_if_codes_2015.hcpcs_code.value_counts().head(10))

#### Use getlist() function (defined above) to turn dictionary to a list

In [131]:
top_10_if_codes_list = getlist(top_10_if_codes_dict)
top_10_if_codes_list

['99232',
 '99223',
 '99233',
 '99222',
 '99213',
 '99231',
 '99214',
 '99291',
 '99238',
 '99284']

#### Create df filtered on entity code = I, place of service = F, and HCPCS code

In [132]:
top_10_if = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["I"]) &
    prov_pmnt_2015.place_of_service.isin(["F"]) &
    prov_pmnt_2015.hcpcs_code.isin(['99232',
 '99223',
 '99233',
 '99222',
 '99213',
 '99231',
 '99214',
 '99291',
 '99238',
 '99284'])
    )] 

#### Pivot data frame to reflect the mean of bene_day_srvc_cnt for each HCPCS code

In [133]:
if_pivot_mean = top_10_if.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.mean)
if_pivot_mean.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
99232    273.051716
99233    185.270433
99214    145.713713
99213    129.797603
99231    107.599192
99284    103.187672
99291     85.350227
99223     79.706693
99222     58.970767
99238     55.652469
Name: bene_day_srvc_cnt, dtype: float64

#### Repeat the same steps for max of bene_day_srvc_cnt

In [123]:
if_pivot_max = top_10_if.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.max)
if_pivot_max.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
99233    13013.0
99231    11884.0
99232    11475.0
99213     5123.0
99214     3935.0
99291     2858.0
99223     2105.0
99284     1710.0
99222     1707.0
99238     1632.0
Name: bene_day_srvc_cnt, dtype: float64

## Organizations - Non Facility or O:O

#### Filter data based on entity code, place of service, and medicare indicator, then pass top ten HCPCS codes into a dictionary

In [124]:
top_10_oo_codes_2015 = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["O"]) &
    prov_pmnt_2015.place_of_service.isin(["O"]) &
    prov_pmnt_2015.medicare_participation_indicator.isin(["Y"]))] 
top_10_oo_codes_dict = dict(top_10_oo_codes_2015.hcpcs_code.value_counts().head(10))

#### Use getlist() function (defined above) to turn dictionary to a list

In [125]:
top_10_oo_codes_list = getlist(top_10_oo_codes_dict)
top_10_oo_codes_list

['G0008',
 '90662',
 'G0009',
 '90670',
 '90656',
 'Q2037',
 '90686',
 '90688',
 '90732',
 'Q2038']

#### Create df filtered on entity code = O, place of service = O, and HCPCS code

In [127]:
top_10_oo = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["O"]) &
    prov_pmnt_2015.place_of_service.isin(["O"]) &
    prov_pmnt_2015.hcpcs_code.isin(['G0008',
 '90662',
 'G0009',
 '90670',
 '90656',
 'Q2037',
 '90686',
 '90688',
 '90732',
 'Q2038'])
    )] 

#### Pivot data frame to reflect the mean of bene_day_srvc_cnt for each HCPCS code

In [129]:
oo_pivot_mean = top_10_oo.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.mean)
oo_pivot_mean.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
G0008    149.097115
90662    116.362172
90688     82.927224
Q2037     76.606824
Q2038     55.672808
G0009     49.731813
90670     49.170990
90686     34.403776
90656     31.777177
90732     22.951206
Name: bene_day_srvc_cnt, dtype: float64

#### Repeat the same steps for max of bene_day_srvc_cnt

In [130]:
oo_pivot_max = top_10_oo.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.max)
oo_pivot_max.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
G0008    88812.0
90662    61072.0
G0009    24309.0
90670    22351.0
Q2037    21904.0
90686     9566.0
90688     7103.0
90732     3533.0
90656     3159.0
Q2038     1428.0
Name: bene_day_srvc_cnt, dtype: float64

## Organizations - Facility or O:F

#### Filter data based on entity code, place of service, and medicare indicator, then pass top ten HCPCS codes into a dictionary

In [134]:
top_10_of_codes_2015 = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["O"]) &
    prov_pmnt_2015.place_of_service.isin(["F"]) &
    prov_pmnt_2015.medicare_participation_indicator.isin(["Y"]))] 
top_10_of_codes_dict = dict(top_10_of_codes_2015.hcpcs_code.value_counts().head(10))

#### Use getlist() function (defined above) to turn dictionary to a list

In [135]:
top_10_of_codes_list = getlist(top_10_of_codes_dict)
top_10_of_codes_list

['A0425',
 'A0429',
 'A0427',
 'A0428',
 'A0433',
 '66984',
 'A0426',
 '43239',
 '45380',
 '45385']

#### Create df filtered on entity code = O, place of service = F, and HCPCS code

In [137]:
top_10_of = prov_pmnt_2015[(
    prov_pmnt_2015.nppes_entity_code.isin(["O"]) &
    prov_pmnt_2015.place_of_service.isin(["F"]) &
    prov_pmnt_2015.hcpcs_code.isin(['A0425',
 'A0429',
 'A0427',
 'A0428',
 'A0433',
 '66984',
 'A0426',
 '43239',
 '45380',
 '45385'])
    )] 

#### Pivot data frame to reflect the mean of bene_day_srvc_cnt for each HCPCS code

In [138]:
of_pivot_mean = top_10_of.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.mean)
of_pivot_mean.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
A0425    1327.194171
A0428    1324.258462
A0427     687.103817
66984     635.836921
A0429     352.753011
43239     286.003829
45380     237.089266
45385     201.699541
A0426     183.146104
A0433      48.286857
Name: bene_day_srvc_cnt, dtype: float64

#### Repeat the same steps for max of bene_day_srvc_cnt

In [139]:
of_pivot_max = top_10_of.pivot_table(index = 'hcpcs_code', values = 'bene_day_srvc_cnt', aggfunc = np.max)
of_pivot_max.bene_day_srvc_cnt.sort_values(ascending = False)

hcpcs_code
A0425    97329.0
A0427    56730.0
A0428    51431.0
A0429    45764.0
66984     5796.0
A0426     3773.0
45380     3043.0
43239     2636.0
45385     1915.0
A0433     1316.0
Name: bene_day_srvc_cnt, dtype: float64

filtered_dataset = []

for i, chunk in enumerate(prov_pmnt_chunks):
    print(i + 1)
    filtered_dataset.append(chunk.loc[chunk['nppes_entity_code'] == 'I', ['place_of_service'] == 'O'])


#prov_pmnt_2015 = prov_pmnt_2015.drop([0])
#prov_pmnt_2015 = prov_pmnt_2015.reset_index(drop = True)
#prov_pmnt_2015['nppes_entity_code'] = prov_pmnt_2015['nppes_entity_code'].astype('category')
#prov_pmnt_2015['place_of_service'] = prov_pmnt_2015['place_of_service'].astype('category')
