In [None]:
##### REQUIRES THE DATAFRAME FOLDER TO BE NAMED 'Cohorts', WHICH INCLUDES ALL PRECOMPUTED DATAFRAMES #####
import fiber
from fiber.cohort import Cohort
from fiber.condition import Patient, MRNs
from fiber.condition import Diagnosis
from fiber.condition import Measurement, Encounter, Drug
from fiber.storage import yaml as fiberyaml
import pandas as pd
import pyarrow.parquet as pq
import numpy as np
import os
from functools import reduce 

In [None]:
#load Cohorts
Case_filtered_15_540 = pq.read_table('Cohorts/Case/Case_filtered_15_540.parquet').to_pandas()
Control_filtered_15_540 = pq.read_table('Cohorts/Control/Control_filtered_15_540.parquet').to_pandas()

In [None]:
#define test Cohorts
Test_Case_filtered_15_540 = Case_filtered_15_540.tail()
Test_Control_filtered_15_540 = Control_filtered_15_540.head()
print(Test_Case_filtered_15_540)
#print(Test_Control_filtered_15_540)

In [None]:
#generic function get cohort 
def df_to_cohort(df):
    mrns = list(df.index.values)
    condition = MRNs(mrns)
    return Cohort(condition)

In [None]:
#Example for Cardio Vascular Diseases
conditions_cv = (Diagnosis("39%", "ICD-9")|
              Diagnosis("40%", "ICD-9")|
              Diagnosis("42%", "ICD-9")|
              Diagnosis("43%", "ICD-9")|
              Diagnosis("44%", "ICD-9")|
              Diagnosis("45%", "ICD-9")|
             Diagnosis("I0%", "ICD-10")|
             Diagnosis("I2%", "ICD-10")|
             Diagnosis("I3%", "ICD-10")|
             Diagnosis("I4%", "ICD-10")|
              Diagnosis("I5%", "ICD-10")|
              Diagnosis("I6%", "ICD-10")|
              Diagnosis("I7%", "ICD-10")|
              Diagnosis("I8%", "ICD-10")|
              Diagnosis("I9%", "ICD-10"))
#Control_filtered_15_540_Cardiovascular = cohort.get(conditions)

In [None]:
#find out how often a a patient had a certain disease
def get_mrn_has_certain_condition_EVER(mrn, df, max_age_in_days):
    #checking if mrn in dataframe
    if (df[df.values == mrn].empty==False): 
        #reducing the dataframe
        patient_df=df.loc[df.medical_record_number==mrn]
        #print(patient_df)
        before_gap= patient_df['age_in_days']<max_age_in_days
        patient_df=patient_df[before_gap]
        #print(patient_df)
        if (len(patient_df)>0):
            return 1 
        else: 
            return 0
    else: 
        return 0

In [None]:
#find out how often  a patient had a certain disease
def get_mrn_has_certain_condition_COUNT(mrn, df, max_age_in_days):
    #checking if mrn in dataframe
    if (df[df.values == mrn].empty==False): 
        #reducing the dataframe
        patient_df=df.loc[df.medical_record_number==mrn]
        #print(patient_df)
        before_gap= patient_df['age_in_days']<max_age_in_days
        patient_df=patient_df[before_gap]
        #print(patient_df)
        #print(len(patient_df))
        #checking whether an entry exist, which happend befor the time gap
        return (len(patient_df))
    else: 
        return 0

In [None]:
#find out how often a  patient had a certain disease in a time window
#The Time Window is currenly set within the function
def get_mrn_has_certain_condition_WINDOW(mrn, df, max_age_in_days):
    #checking if mrn in dataframe
    if (df[df.values == mrn].empty==False): 
        #reducing the dataframe
        patient_df=df.loc[df.medical_record_number==mrn]
       # print(patient_df)
        before_gap= patient_df['age_in_days']<max_age_in_days
        #GAP 
        window_gap=patient_df['age_in_days']>(max_age_in_days-10000)
        patient_df=patient_df[window_gap]
       ## print(patient_df)
       # print(len(patient_df))
        #checking whether an entry exist, which happend befor the time gap
        return (len(patient_df))
    else: 
        return 0

In [None]:
#condition= FIBER Condition 
#df_mrn = pandas Data frame with Patient MRNS 
#name_df = Name o the new Dataframe
#name_feature =Feature name 
#frequency = type of occurence: EVER, COUNT,WINDOW
def get_has_certain_condition(condition, df_mrn, name_cohort,name_feature,gap_in_days,name_df, frequency):
    #get cohort
    cohort = df_to_cohort(df_mrn)
    #get cohort with condition
    cohort_condition = cohort.get(condition)
    print(cohort_condition)
    #save cohrt
    cohort_condition.to_parquet(name_cohort)
    # get MRNs of input file 
    cohort_indexes= df_mrn.index
    #get 0 for no diagnosis and 1 for has diagnosis
    cohort_mrn_diagnosis=[]
    if frequency=='EVER':
        for mrn in cohort_indexes: 
            x = get_mrn_has_certain_condition_EVER(mrn,cohort_condition,df_mrn.loc[mrn]["HT_Onset"]-gap_in_days)
            a = [mrn, x]
            cohort_mrn_diagnosis.append(a)
    if frequency=='COUNT':
        for mrn in cohort_indexes: 
            x = get_mrn_has_certain_condition_COUNT(mrn,cohort_condition,df_mrn.loc[mrn]["HT_Onset"]-gap_in_days)
            a = [mrn, x]
            cohort_mrn_diagnosis.append(a)
    if frequency=='WINDOW':
        for mrn in cohort_indexes: 
            x = get_mrn_has_certain_condition_WINDOW(mrn,cohort_condition,df_mrn.loc[mrn]["HT_Onset"]-gap_in_days)
            a = [mrn, x]
            cohort_mrn_diagnosis.append(a)
    else:
        print("function not available")
    #convert list cohort_mrn_diagnosis to panda
    condition_name= 'has_condition_'+name_feature
    col_names = ['medical_record_number',condition_name]
    df_final=pd.DataFrame(cohort_mrn_diagnosis,columns=col_names)
    df_final.set_index('medical_record_number', inplace=True)
    df_final=df_mrn.merge( df_final, left_index=True, right_index=True)
    df_final.to_parquet(name_df)
    print(df_final)
    return df_final

In [None]:
conditions_CV = (Diagnosis("39%", "ICD-9")|
              Diagnosis("40%", "ICD-9")|
              Diagnosis("42%", "ICD-9")|
              Diagnosis("43%", "ICD-9")|
              Diagnosis("44%", "ICD-9")|
              Diagnosis("45%", "ICD-9")|
             Diagnosis("I0%", "ICD-10")|
             Diagnosis("I2%", "ICD-10")|
             Diagnosis("I3%", "ICD-10")|
             Diagnosis("I4%", "ICD-10")|
              Diagnosis("I5%", "ICD-10")|
              Diagnosis("I6%", "ICD-10")|
              Diagnosis("I7%", "ICD-10")|
              Diagnosis("I8%", "ICD-10")|
              Diagnosis("I9%", "ICD-10"))

In [None]:
#Test Diabetes
conditions_diabetes_type_2 = (Diagnosis("249%","ICD-9")|
                            Diagnosis("E11%","ICD-10"))

In [None]:
#test Diastolic Blood Pressure 
conditions_Blood_Pressure=Measurement("Diastolic Blood Pressure") > 70

In [None]:
#test Drug Lisinopril 
condition_lisinopril=(Drug("%Lisinopril%"))

In [None]:
Test_Case_filtered_15_540

In [None]:
Test_Case_filtered_15_540.loc["999008531"]["HT_Onset"]-180

In [None]:
#Test on Diabetes Condition 
df_final= get_has_certain_condition(condition_diabetes_type_2,Test_Case_filtered_15_540, "Test_Diabetes_Case_filtered_15_540", "Diabetes_Type_2",180, "Test_Diabetes_Case_filtered_15_540.parquet","WINDOW")


In [None]:
df_final

In [None]:
Test_Case_filtered_15_540

In [None]:
 
df_final= get_has_certain_condition(condition_diabetes_type_2, Case_filtered_15_540, "Test_Diabetes_Case_filtered_15_540", "Diabetes_Type_2",180, "Test_Diabetes_Case_filtered_15_540.parquet","EVER")
