In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
df = pd.read_csv("processed_data/subgroup_1_predictions.csv", index_col=0)
df.head()

Unnamed: 0,subject_id,hadm_id,subgroup_x,rounded_age,Combined_Blood_Pressure,charttime,Final_Temperature_F,heart_rate,RespiratoryRate_combined,SpO2,...,coagulopathy,obesity,weight_loss,fluid_electrolyte,blood_loss_anemia,deficiency_anemias,alcohol_abuse,drug_abuse,psychoses,depression
0,59,104130,1,87,103,11/14/97 23:00,98.6,62.0,14.0,99,...,0,0,0,0,0,0,0,0,0,1
1,59,104130,1,87,105,11/15/97 3:00,98.5,62.0,9.0,95,...,0,0,0,0,0,0,0,0,0,1
2,59,104130,1,87,107,11/15/97 7:00,97.800079,63.0,17.0,97,...,0,0,0,0,0,0,0,0,0,1
3,59,104130,1,87,117,11/15/97 11:00,97.500021,62.0,20.0,98,...,0,0,0,0,0,0,0,0,0,1
4,59,104130,1,87,124,11/15/97 16:00,99.199997,70.0,21.0,96,...,0,0,0,0,0,0,0,0,0,1


In [3]:
def convert_int64(dataframe):
    for col in dataframe.columns:
        if dataframe[col].dtype == np.int64:
            dataframe[col] = dataframe[col].astype(int)
    return df
df = convert_int64(df)

In [4]:
df.dtypes

subject_id                    int64
hadm_id                       int64
subgroup_x                    int64
rounded_age                   int64
Combined_Blood_Pressure       int64
charttime                    object
Final_Temperature_F         float64
heart_rate                  float64
RespiratoryRate_combined    float64
SpO2                          int64
date                         object
hadm_number                   int64
hours_in_hospital             int64
sofa                          int64
group                         int64
predictions                   int64
congestive_heart_failure      int64
cardiac_arrhythmias           int64
valvular_disease              int64
pulmonary_circulation         int64
peripheral_vascular           int64
hypertension                  int64
paralysis                     int64
other_neurological            int64
chronic_pulmonary             int64
diabetes_uncomplicated        int64
diabetes_complicated          int64
hypothyroidism              

In [5]:
def get_latest_vitals(dataframe):
    dataframe['charttime'] = pd.to_datetime(dataframe['charttime'])
    dataframe_sorted = dataframe.sort_values(by=['subject_id', 'charttime'])
    latest_vitals = dataframe_sorted.groupby('subject_id').last().reset_index()
    return latest_vitals

latest_vitals_df = get_latest_vitals(df)

latest_vitals_df.head()

Unnamed: 0,subject_id,hadm_id,subgroup_x,rounded_age,Combined_Blood_Pressure,charttime,Final_Temperature_F,heart_rate,RespiratoryRate_combined,SpO2,...,coagulopathy,obesity,weight_loss,fluid_electrolyte,blood_loss_anemia,deficiency_anemias,alcohol_abuse,drug_abuse,psychoses,depression
0,59,104130,1,87,163,1997-11-16 19:00:00,97.599998,74.0,16.0,97,...,0,0,0,0,0,0,0,0,0,1
1,109,170149,1,24,145,2041-05-25 19:00:00,97.400002,97.0,26.0,100,...,1,0,0,1,0,0,0,0,0,0
2,130,198214,1,61,119,2019-11-03 04:00:00,98.6,93.0,26.0,97,...,0,0,0,0,0,0,0,0,0,0
3,234,134944,1,54,109,2006-04-13 05:00:00,96.699997,86.0,31.0,100,...,0,0,0,0,0,0,0,0,0,0
4,263,120845,1,56,146,2060-01-11 16:00:00,98.300003,60.0,22.0,98,...,0,0,0,0,0,0,0,0,0,0


In [6]:
latest_vitals_df[latest_vitals_df['predictions'] == 2]

Unnamed: 0,subject_id,hadm_id,subgroup_x,rounded_age,Combined_Blood_Pressure,charttime,Final_Temperature_F,heart_rate,RespiratoryRate_combined,SpO2,...,coagulopathy,obesity,weight_loss,fluid_electrolyte,blood_loss_anemia,deficiency_anemias,alcohol_abuse,drug_abuse,psychoses,depression
7,353,112976,1,62,124,2051-06-27 05:00:00,100.000043,85.0,20.0,90,...,1,0,0,1,0,0,0,0,0,0
13,453,187961,1,80,107,2056-04-26 15:00:00,99.600079,95.0,19.0,95,...,0,0,0,0,0,0,0,0,0,0
15,527,165891,1,72,118,1975-08-18 07:00:00,100.000043,77.0,20.0,95,...,0,0,0,0,0,0,0,0,0,0
16,584,167135,1,90,83,2014-10-24 03:00:00,97.400002,72.0,21.0,96,...,0,0,0,0,0,0,0,0,0,0
17,588,170452,1,69,115,2000-01-15 04:00:00,97.500021,70.0,16.0,99,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1068,32247,131903,1,63,147,2022-01-25 12:00:00,97.800079,99.0,18.0,99,...,0,0,0,1,0,0,0,0,0,1
1069,32296,143945,1,71,137,2002-04-23 15:00:00,95.500043,71.0,20.0,98,...,0,0,0,0,0,0,0,0,0,0
1070,32315,101791,1,81,123,2037-09-07 11:00:00,96.800003,71.0,15.0,100,...,1,0,0,1,1,0,0,0,0,0
1077,32598,135125,1,67,141,2010-11-21 21:00:00,99.300021,81.0,19.0,94,...,0,0,0,0,0,0,0,0,0,0


In [7]:
def json_converter(dataframe, patient_id):
    data = {
        "name": "John Doe",
    }
    row = dataframe[dataframe['subject_id']==patient_id]
    
    elix = row.iloc[:, -30:]
    one_hot_columns = elix.columns[elix.eq(1).any()].tolist()
    one_hot_columns = [' '.join(word.capitalize() for word in item.replace('_', ' ').split()) for item in one_hot_columns]

    def custom_serializer(obj):
        if isinstance(obj, np.integer):
            return int(obj)  # Convert NumPy integers to Python int
        elif isinstance(obj, pd.Timestamp):
            return obj.isoformat()  # Convert Timestamp to ISO 8601 string
        raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
    
    data["subject_id"] = row["subject_id"].iloc[0]
    data["subgroup"] = row["subgroup_x"].iloc[0]
    data["latest_blood_pressure"] = row["Combined_Blood_Pressure"].iloc[0]
    data["latest_chart_time"] = row["charttime"].iloc[0]
    data["latest_temp_F"] = row["Final_Temperature_F"].iloc[0]
    data["latest_heart_rate"] = row["heart_rate"].iloc[0]
    data["latest_respiratory_rate"] = row["RespiratoryRate_combined"].iloc[0]
    data["elixhauser"] = one_hot_columns
    data["age"] = row["rounded_age"].iloc[0]
    data["spo2"] = row["SpO2"].iloc[0]
    data["sofa"] = row["sofa"].iloc[0]
    
    file_name = "processed_data/id" + str(patient_id) + ".json"

    with open(file_name, 'w') as json_file:
        json.dump(data, json_file, indent=4, default=custom_serializer)
        
json_converter(latest_vitals_df, 453)

In [8]:
latest_vitals_df[latest_vitals_df['subject_id']==59]['charttime'].iloc[0]

Timestamp('1997-11-16 19:00:00')