In [1]:
from hcc import Diagnosis, Beneficiary, ICDType, score,regvars, EntitlementReason
from pyDatalog import pyDatalog
import pandas as pd
import random

In [2]:
#reading files
diag_f= "test_data/diag.csv"
person_f= "test_data/person.csv"
output_f= "test_data/output.csv"
hcc_coeff_f= "hcc_coefficients_cleaned.csv"

df_diag= pd.read_csv(diag_f)
df_person= pd.read_csv(person_f)
df_output= pd.read_csv(output_f)
hcc_coeff= pd.read_csv(hcc_coeff_f, names= ['coeff', 'value'])

In [3]:
raf_type= {'CFA': 'valid_community_aged_variables', 'CFD': 'valid_community_disabled_variables', 'CNA': 'valid_community_aged_variables', 'CND': 'valid_community_disabled_variables', 'CPA': 'valid_community_aged_variables', 'CPD': 'valid_community_disabled_variables'}
raf_type

{'CFA': 'valid_community_aged_variables',
 'CFD': 'valid_community_disabled_variables',
 'CNA': 'valid_community_aged_variables',
 'CND': 'valid_community_disabled_variables',
 'CPA': 'valid_community_aged_variables',
 'CPD': 'valid_community_disabled_variables'}

In [4]:
df_diag.head()


Unnamed: 0,Patient_ID,ICD10,month,year
0,1132385403999,C760,9,17
1,1132385403999,C760,10,17
2,1132385403999,I270,10,17
3,1132385403999,C760,11,17
4,1132385403999,I270,11,17


In [5]:
df_person.head()


Unnamed: 0,Month_of_eligibility,Patient_DOB,Gender,Patient_ID,RAF_Type
0,2017-03-01,1951-08-08,M,1132385403999,CFA
1,2017-01-01,1951-09-04,F,1125322747999,CNA
2,2018-05-01,1938-12-13,M,1113060059999,CPA
3,2017-07-01,1940-08-11,F,1963247143999,CNA
4,2017-09-01,1945-03-25,F,153951827999,CNA


In [6]:
df_output.head()

Unnamed: 0,Run_date,year_of_eligibility,Patient_ID,DOB,Gender,Group_code,Plan_type,Observation,RAF coefficient,RAF_type
0,2018-09-24,2017-01-01,100231512200308033401999,1927-11-08,F,AE430044,MEDICARE ADVANTAGE,CNA_F85_89,0.664,CNA
1,2018-09-24,2017-02-01,100231512200308033401999,1927-11-08,F,AE430044,MEDICARE ADVANTAGE,CNA_F85_89,0.664,CNA
2,2018-09-24,2017-03-01,100231512200308033401999,1927-11-08,F,AE430044,MEDICARE ADVANTAGE,CNA_F85_89,0.664,CNA
3,2018-09-24,2017-04-01,100231512200308033401999,1927-11-08,F,AE430044,MEDICARE ADVANTAGE,CNA_F85_89,0.664,CNA
4,2018-09-24,2017-05-01,100231512200308033401999,1927-11-08,F,AE430044,MEDICARE ADVANTAGE,CNA_F85_89,0.664,CNA


In [7]:
print('selecting random row from the person df')
#print(df_person.index)
id=random.randint(df_person.index[0],df_person.index[-1] )
temp_patient= dict(df_person.loc[id])
print(temp_patient)

selecting random row from the person df
{'Month_of_eligibility': '2017-08-01', 'Patient_DOB': '1962-05-25', 'Gender': 'F', 'Patient_ID': '1132685531999', 'RAF_Type': 'CPD'}


In [8]:
print("getting diagnosis data for {}: {}".format('Patient_ID', temp_patient['Patient_ID'] ))
temp_diag= df_diag.query("Patient_ID=='{}'".format(temp_patient['Patient_ID']))
temp_diag

getting diagnosis data for Patient_ID: 1132685531999


Unnamed: 0,Patient_ID,ICD10,month,year
2018,1132685531999,E119,3,17
2019,1132685531999,E119,4,17
2020,1132685531999,E119,5,17
2021,1132685531999,E119,6,17
2022,1132685531999,E119,7,17
2023,1132685531999,E119,8,17
2024,1132685531999,Z6843,8,17
2025,1132685531999,E119,9,17
2026,1132685531999,Z6843,9,17
2027,1132685531999,E119,10,17


In [9]:
from hcc import Diagnosis, Beneficiary, ICDType, score,regvars, EntitlementReason
from pyDatalog import pyDatalog

sex= {'f':'female', 'm': 'male'}

temp_sex= sex[temp_patient['Gender'].lower()]

temp_dob= ''.join(temp_patient['Patient_DOB'].split('-'))
temp_age_upto= ''.join(temp_patient['Month_of_eligibility'].split('-'))

char1, char2, char3=temp_patient['RAF_Type']



if char1 =='E' and char3 =='D':
    orec= 3
else:
    orec= {'A':0, 'D':1}[char3]
    
print(temp_sex, temp_dob,temp_age_upto, orec)

person= Beneficiary(hicno= temp_patient['Patient_ID'], sex= temp_sex ,dob= temp_dob, age_upto= temp_age_upto , original_reason_entitlement= orec, medicaid=True, )
print(person)

female 19620525 20170801 1
ID:1132685531999,DOB:1962-05-25 00:00:00,age_upto:2017-08-01 00:00:00


In [10]:
for code in temp_diag[' ICD10'].values:
    person.add_diagnosis(Diagnosis(person,code,ICDType.TEN))

In [11]:
pyDatalog.create_terms("Vars")

temp_raf_type= raf_type[temp_patient['RAF_Type']]

conditiion_categories= regvars(person, temp_raf_type, Vars)[0][0].split(',')

print("conditiion_categories: {} for RAF_Type: {}".format(conditiion_categories,temp_patient['RAF_Type']))

conditiion_categories: ['HCC19', 'HCC22'] for RAF_Type: CPD


In [12]:
func1= lambda c : '_'.join([temp_patient['RAF_Type'],c]).lower()
func2= lambda x: round(list(hcc_coeff.query("coeff=='{}'".format(x))['value'])[0],3)

[ {func1(c):func2(func1(c))} for c in conditiion_categories]

[{'cpd_hcc19': 0.136}, {'cpd_hcc22': 0.242}]

In [13]:
df_output.query("Patient_ID=='{}'".format(temp_patient['Patient_ID']))

Unnamed: 0,Run_date,year_of_eligibility,Patient_ID,DOB,Gender,Group_code,Plan_type,Observation,RAF coefficient,RAF_type
2593,2018-09-24,2017-01-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2594,2018-09-24,2017-02-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2595,2018-09-24,2017-03-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2596,2018-09-24,2017-04-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2597,2018-09-24,2017-05-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2598,2018-09-24,2017-06-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2599,2018-09-24,2017-07-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2600,2018-09-24,2017-08-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2601,2018-09-24,2017-09-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
2602,2018-09-24,2017-10-01,1132685531999,1962-05-25,F,,MEDICARE ADVANTAGE,CPD_F45_54,0.374,CPD
