In [51]:
import numpy as np
import pandas as pd
import json
from sklearn.preprocessing import MinMaxScaler

In [64]:
def load_demo_viome_from_json(
    demo_viome_path="demographics-microbiome-data.json"
):
    """
    load_demo_viome_from_json loading demographics and microbiome data & process as needed

    Args:
        demo_viome_path (str, optional): path of demographics and viome data. Defaults to "demographics-microbiome-data.json".

    Returns:
        demo_viome (dict): demographics and viome readings in dictionary format
    """

    def demo_viome_data_loader(demo_viome_path="demographics-microbiome-data.json"):
        with open(demo_viome_path) as json_file:
            demo_viome = json.load(json_file)

        # Perform data processing here
        # 1. Age (float to float)
        # Create a MinMaxScaler instance for Age
        age_scaler = MinMaxScaler()
        
        # Extract the "Age" values and reshape them to a 2D array
        ages = [[data["Age"]] for data in demo_viome.values() if "Age" in data]
        
        # Fit the scaler to the data to compute min and max values
        age_scaler.fit(ages)
        
        # Transform and update the "Age" values with the scaled values
        for user_id, data in demo_viome.items():
            if "Age" in data:
                age = data["Age"]
                # Reshape the age value to a 2D array before scaling
                scaled_age = age_scaler.transform([[age]])[0][0]
                data["Age"] = scaled_age
        
        # 2. Gender (str to binary)
        for user_id, data in demo_viome.items():
            # 0 for "Male" and 1 for "Female"
            if "Gender" in data:
                gender = data["Gender"]
                if gender == "M":
                    data["Gender"] = 0
                elif gender == "F":
                    data["Gender"] = 1

        # 3. BMI = 703 * weight(lb) / height^2(in)
        # Calculate BMI for each row
        for user_id, data in demo_viome.items():
            if "Body weight" in data and "Height" in data:
                weight_lb = data["Body weight"]
                height_inches = data["Height"]
                data["BMI"] = 703 * weight_lb / (height_inches ** 2)

        # Create a MinMaxScaler instance for BMI
        scaler_bmi = MinMaxScaler()

        # Extract the "BMI" values and reshape them to a 2D array
        bmi_values = [[data["BMI"]] for data in demo_viome.values() if "BMI" in data]

        # Fit the scaler to the data to compute min and max values for BMI
        scaler_bmi.fit(bmi_values)

        # Transform and update the "BMI" values with the scaled values
        for user_id, data in demo_viome.items():
            if "BMI" in data:
                bmi = data["BMI"]
                # Reshape the BMI value to a 2D array before scaling
                scaled_bmi = scaler_bmi.transform([[bmi]])[0][0]
                data["BMI"] = scaled_bmi
       
        # 4. A1c PDL (float to float)
        # Create a MinMaxScaler instance for A1c PDL (Lab)
        scaler_a1c = MinMaxScaler()

        # Extract the "A1c PDL (Lab)" values and reshape them to a 2D array
        a1c_values = [[data["A1c PDL (Lab)"]] for data in demo_viome.values() if "A1c PDL (Lab)" in data]

        # Fit the scaler to the data to compute min and max values for A1c PDL (Lab)
        scaler_a1c.fit(a1c_values)

        # Transform and update the "A1c PDL (Lab)" values with the scaled values
        for user_id, data in demo_viome.items():
            if "A1c PDL (Lab)" in data:
                a1c = data["A1c PDL (Lab)"]
                # Reshape the A1c value to a 2D array before scaling
                scaled_a1c = scaler_a1c.transform([[a1c]])[0][0]
                data["A1c PDL (Lab)"] = scaled_a1c

        # 5. Fasting GLU (float to float)
        # Create a MinMaxScaler instance for Fasting GLU - PDL (Lab)
        scaler_glu = MinMaxScaler()

        # Extract the "Fasting GLU - PDL (Lab)" values and reshape them to a 2D array
        glu_values = [[data["Fasting GLU - PDL (Lab)"]] for data in demo_viome.values() if "Fasting GLU - PDL (Lab)" in data]

        # Fit the scaler to the data to compute min and max values for Fasting GLU - PDL (Lab)
        scaler_glu.fit(glu_values)

        # Transform and update the "Fasting GLU - PDL (Lab)" values with the scaled values
        for user_id, data in demo_viome.items():
            if "Fasting GLU - PDL (Lab)" in data:
                glu = data["Fasting GLU - PDL (Lab)"]
                # Reshape the Fasting GLU value to a 2D array before scaling
                scaled_glu = scaler_glu.transform([[glu]])[0][0]
                data["Fasting GLU - PDL (Lab)"] = scaled_glu

        # 6. Insulin (str to float)
        # Create a MinMaxScaler instance for Insulin
        scaler_insulin = MinMaxScaler()

        # Extract the "Insulin" values and reshape them to a 2D array
        insulin_values = [[data["Insulin"]] for data in demo_viome.values() if "Insulin" in data]

        # Fit the scaler to the data to compute min and max values for Insulin
        scaler_insulin.fit(insulin_values)

        # Transform and update the "Insulin" values with the scaled values
        for user_id, data in demo_viome.items():
            if "Insulin" in data:
                insulin = data["Insulin"]
                # Reshape the Insulin value to a 2D array before scaling
                scaled_insulin = scaler_insulin.transform([[insulin]])[0][0]
                data["Insulin"] = scaled_insulin

        # 7. Top 6 Bacteria (binary to list in binary)
        # "Tannerella sp. 6_1_58FAA_CT1", (missing from data) 
        top_6 = ["Alistipes onderdonkii", "Clostridiales bacterium VE202-18", "Filifactor alocis ATCC 35896", "Lachnospiraceae bacterium 3-1", "Bifidobacterium adolescentis strain BBMN23", "Coprococcus sp. HPP0048"]
        # Iterate over participants in the JSON data
        for user_id, data in demo_viome.items():
            # Create a list of binary values for the selected bacteria
            bacteria_list = [data[bacteria] for bacteria in top_6]
            # Add the list to the participant's data
            data['Top_6_Bacteria_List'] = bacteria_list

        return demo_viome

    return demo_viome_data_loader(demo_viome_path=demo_viome_path)


In [65]:
# Load demographics and viome data
demo = load_demo_viome_from_json(demo_viome_path="/Users/sua/Desktop/stmi/multimodal_macronutrient/demographics-microbiome-data.json")

# List of keys to print
keys_to_print = ["Age", "Gender", "BMI", "A1c PDL (Lab)", "Fasting GLU - PDL (Lab)", "Insulin", 'Top_6_Bacteria_List']

# Iterate through each user's data
for user_id, user_data in demo.items():
    print(f"User ID: {user_id}")
    for key in keys_to_print:
        if key in user_data:
            print(f"{key}: {user_data[key]}")
    print("\n")

User ID: 1001
Age: 0.17647058823529416
Gender: 0
BMI: 0.06239911835971723
A1c PDL (Lab): 0.29166666666666674
Fasting GLU - PDL (Lab): 0.022222222222222143
Insulin: 0.0
Top_6_Bacteria_List: [0, 0, 0, 0, 1, 1]


User ID: 1002
Age: 0.607843137254902
Gender: 1
BMI: 0.40628680240309256
A1c PDL (Lab): 0.33333333333333326
Fasting GLU - PDL (Lab): 0.04444444444444451
Insulin: 0.5234042553191489
Top_6_Bacteria_List: [0, 0, 0, 0, 0, 1]


User ID: 1003
Age: 0.8039215686274509
Gender: 1
BMI: 0.24791782174759414
A1c PDL (Lab): 0.7500000000000002
Fasting GLU - PDL (Lab): 0.3222222222222222
Insulin: 0.6340425531914893
Top_6_Bacteria_List: [0, 0, 0, 0, 0, 1]


User ID: 1004
Age: 0.2941176470588236
Gender: 1
BMI: 0.8593452348160411
A1c PDL (Lab): 0.33333333333333326
Fasting GLU - PDL (Lab): 0.1777777777777778
Insulin: 0.7191489361702127
Top_6_Bacteria_List: [0, 0, 0, 0, 0, 0]


User ID: 1005
Age: 0.6470588235294118
Gender: 1
BMI: 0.4067143132678237
A1c PDL (Lab): 0.7916666666666667
Fasting GLU - PDL (L