In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None 
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

In [2]:
def create_meds_features(cohort, table, output_file_name, table_name, kind):  
    print("started creating features from ",table_name)
    table.rename(columns={'hours_from_starttime_to_targettime' : 'hours_from_charttime_to_targettime', 
                         'start_date' : 'charttime'}, inplace=True)
    cohort = cohort.sort_values(by=['identifier'])
    
    # Create the empty dataframe with all the identifiers:
    idents = cohort['identifier']
    df = pd.DataFrame(idents, columns=['identifier'])
    
    #create the columns of features with 0 values:  
    features = ["Got any {} during hospital stay".format(kind), #0
                "Number of different {} the patient got".format(kind), #1
                "Doses of any {} the patient got".format(kind), #2
                "Doses of any {} in the last day before target time".format(kind), #3
                "Got any {} in the last day before target time".format(kind), #4
                "Number of different {} in the last day before target time".format(kind), #5
                "Hours from first {} dose to target time".format(kind), #6
                "Hours from last {} dose to target time".format(kind) #7
               ]
    for feat in features:
        df[feat] = 0
    
    # create the final df with all the statistics for all identifiers:
    df_final = pd.DataFrame()
    table_idens = table.identifier.unique()
    h_str = 'hours_from_charttime_to_targettime'
    
    for iden in df['identifier']:
        df_relevant_row = df.loc[df['identifier'] == iden] # the row of the specific identifier in df
        if iden in table_idens: #skip identifiers that's not in table
            table_relevant_rows = table.loc[table['identifier'] == iden].reset_index() # the rows of the specific identifier in table
            df_relevant_row[features[0]] = 1
            df_relevant_row[features[1]] = len(table_relevant_rows.order_name.unique())
            df_relevant_row[features[2]] = len(table_relevant_rows)
            on_target_day_df = table_relevant_rows.loc[(table_relevant_rows[h_str] <= 24.0) & (table_relevant_rows[h_str] >= 0.0)] 
            on_target_day_len = len(on_target_day_df)
            if on_target_day_len > 0: #there is at least one dose the patient got on the last day prior to target time
                df_relevant_row[features[3]] = on_target_day_len
                df_relevant_row[features[4]] = 1
                df_relevant_row[features[5]] = len(on_target_day_df.order_name.unique())
            df_relevant_row[features[6]] = table_relevant_rows.at[0, h_str]
            df_relevant_row[features[7]] = table_relevant_rows.at[len(table_relevant_rows)-1, h_str]

        #add the relevant row to final df:
        df_final = pd.concat([df_final, df_relevant_row])
    
    print("done creating features from ",table_name) 
    print("shape: ",df_final.shape)

    #display(df_final)
    df_final.to_csv(output_file_name, encoding='utf-8', index=False)
    
    print("done creating {}_for_modeling.csv\n".format(table_name))
    
    
    