In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None

In [None]:
def calculate_bmi(cohort, table, output_file_name, table_name):
    print("started creating features from ",table_name)
    table['admissionheight'].fillna((table['admissionheight'].mean()), inplace=True)
    table['admissionweight'].fillna((table['admissionweight'].mean()), inplace=True)
    table['BMI on admission'] = table['admissionweight']/((table['admissionheight']/100.0)*(table['admissionheight']/100.0))
    
    print("done creating features from ",table_name) 
    print("shape: ",table.shape)
    table.to_csv(output_file_name, encoding='utf-8', index=False)
    print("done creating {}_for_modeling.csv".format(table_name))

In [2]:
def create_daily_weight(cohort, table, output_file_name, table_name):
    print("started creating features from ",table_name)
    cohort = cohort.sort_values(by=['identifier'])
    table.dropna(subset=['kg'], inplace=True) # remove null values

    # Create the empty dataframe with all the identifiers:
    idents = cohort['identifier']
    df = pd.DataFrame(idents, columns=['identifier'])
    features = [' max', ' min', ' num of measurements']
    label = 'daily weight'
    
    #create the columns with nan values:    
    for feat in features:
        df[label + feat] = np.nan

    # create the final df with all the statistics for all identifiers:
    df_final = pd.DataFrame()
    table_idens = table.identifier.unique()
    for iden in df['identifier']:
        df_relevant_row = df.loc[df['identifier'] == iden] # the row of the specific identifier in df
        if iden in table_idens: #skip identifiers that's not in table
            relevant_rows = table.loc[table['identifier'] == iden]
            values = [relevant_rows['kg'].max(), relevant_rows['kg'].min(), len(relevant_rows)] # max, min, num of measurements
            i = 0
            for feat in features:
                df_relevant_row.loc[:, label + feat] = values[i]
                i+=1
            
        df_final = pd.concat([df_final, df_relevant_row])
    
    #display(df_final)
    df_final['daily weight num of measurements'].fillna(0, inplace=True)
    df_final['daily weight delta bw max and min'] = df_final['daily weight max'] - df_final['daily weight min']
    
    print("done creating features from ",table_name) 
    print("shape: ",df_final.shape)
    df_final.to_csv(output_file_name, encoding='utf-8', index=False)
    print("done creating {}_for_modeling.csv\n".format(table_name))

    