In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import sys

sys.path.append("../../")
from lib.general import get_stage_list
from lib.stats import demographic_characteristics
from lib.plotly import standard_layout, add_box, add_pairwise_comparison
from lib.r_interface import tukey_multiple_dvs

Error importing in API mode: ImportError('On Windows, cffi mode "ANY" is only "ABI".')
Trying to import in ABI mode.
Trying to import in ABI mode.


### Input

In [2]:
# Define I/O paths
path_demographics: Path = Path(
    "../../../data/processed/adni/demographics_biomarkers.csv"
).resolve()
# path_lipidomics: Path = Path("../../../data/processed/adni/lipidomics_total.csv").resolve()
# path_lipidomics_dict: Path = Path("../../../data/processed/adni/lipidomics_dict.csv").resolve()
path_output_table: Path = Path(
    "../../../assets/tables/adni/demographic_characteristics.csv"
).resolve()
path_output_figure: Path = Path("../../../assets/figures/adni/").resolve()

In [3]:
# Read file
demographics: pd.DataFrame = pd.read_csv(path_demographics).dropna().drop_duplicates()
# lipidomics: pd.DataFrame = pd.read_csv(path_lipidomics).dropna().drop_duplicates()
# lipidomics_dict: pd.DataFrame = pd.read_csv(path_lipidomics_dict).dropna().drop_duplicates().convert_dtypes()

In [4]:
# Join dataframes
df: pd.DataFrame = (
    demographics.copy()
)  # join(lipidomics.set_index("RID"), on="RID", how="inner")

In [5]:
# Get the list of stages
stage_list: list[str] = get_stage_list(2)
# Get the list of plasma lipids
# lipid_list: list[str] = lipidomics_dict["lipid_class"].unique().tolist()

In [6]:
# Convert columns to the compatible data types
df[df.select_dtypes(include=[bool, int]).columns] = df[
    df.select_dtypes(include=[bool, int]).columns
].astype(int)
df[df.select_dtypes(include=[float]).columns] = df[
    df.select_dtypes(include=[float]).columns
].astype(float)
df[df.select_dtypes(include=[object]).columns] = df[
    df.select_dtypes(include=[object]).columns
].astype(str)

In [7]:
# Compute the summary statistics table
df_stats: pd.DataFrame = demographic_characteristics(df, stage_list)
df_stats.to_csv(path_output_table, index=True)

### Dashboard

In [8]:
# Define columns for better legend labeling
df["ptau_log10"] = np.log10(df["ptau"])
df["Sex"] = df["sex"].map({0: "Female", 1: "Male"})
df["Cognitive Status"] = df["cog"].map({0: "CN", 1: "MCI"})
df["APOE4 Carrier Status"] = df["apoe4"].map({0: "Non-carrier", 1: "Carrier"})

In [9]:
# Create a dictionary for comparison variables
comparison_dict: dict[str, tuple[str, list[str]]] = {
    "Sex": ("sex", ["Female", "Male"]),
    "Cognitive Status": ("cog", ["CN", "MCI"]),
    "APOE e4 Carrier Status": ("apoe4", ["Non-carrier", "Carrier"]),
}

In [10]:
# Define the list of comparison options
comparison_options: list[str] = ["none"] + list(comparison_dict.keys())

In [11]:
# Create stage labels which include the number of samples in each stage
stage_label_list: list[str] = [
    f"{stage}<br>({df_stats.loc['N', stage]})" for stage in stage_list
]
df["stage_label"] = df["stage"].map(dict(zip(stage_list, stage_label_list)))

In [12]:
# Precompute Tukey's post-hoc for all dependent variables
dvs: list[str] = ["av45", "strem2_log10", "ptau_log10"]  # + lipid_list
df_tukey: pd.DataFrame = tukey_multiple_dvs(df, dvs, stage_list)

R callback write-console: Loading required package: MASS
  
R callback write-console: 
Attaching package: 'TH.data'

  
R callback write-console: The following object is masked from 'package:MASS':

    geyser

  
R callback write-console: 
Attaching package: 'TH.data'

  
R callback write-console: The following object is masked from 'package:MASS':

    geyser

  


In [13]:
# fmt: off
app: Dash = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = html.Div([
    html.Hr(),
    dbc.Row([
        dbc.Col([
            dbc.Label(children="Select a variable to plot:"),
            dcc.Dropdown(options=dvs, value=dvs[0], id="var"),
            html.Hr(),
            dbc.Label(children="Select a condition to compare:"),
            dbc.RadioItems(options=comparison_options, value="none", id="comparison", inline=True),
            html.Hr(),
            dbc.Label(children="Add connecting lines:"),
            dbc.RadioItems(options=["none", "mean", "median"], value="none", id="connecting_line", inline=True),
            html.Hr(),
            dbc.Switch(label="Toggle symbolic mode", id="symbolic_mode", value=True),
        ], width={"size": 3, "offset": 1}),
        dbc.Col([
            dbc.Label(children="Adjust height"),
            dcc.Slider(id="height", min=200, max=1000, step=25, value=550, marks={x: str(x) for x in list(range(200, 1000 + 200, 200))}),
            dbc.Label(children="Adjust width"),
            dcc.Slider(id="width", min=400, max=1400, step=25, value=800, marks={x: str(x) for x in list(range(400, 1400 + 200, 200))}),
            dbc.Label(children="Top margin"),
            dcc.Slider(id="top_margin", min=100, max=500, step=25, value=150, marks={x: str(x) for x in list(range(100, 500 + 100, 100))}),
            dbc.Label(children="Annotation: interline"),
            dcc.Slider(id="interline", min=10, max=100, step=5, value=65, marks={x: str(x) for x in list(range(10, 110 + 10, 20))}),
        ], width=4),
        dbc.Col([
            dbc.Label("Line color"),
            dbc.Input(type="color", id="color_line", value="#000000", style={"width": 75, "height": 50},),
            html.Hr(),
            dbc.Button(children="Download as PDF+PNG", id="download", n_clicks=0),
        ], width=3),
    ]),
    html.Hr(),
    dbc.Col(dcc.Graph(id="graph"), width={"size": 6, "offset": 2}),
], style={"backgroundColor": "white"})

In [14]:
# fmt: off
@app.callback(
    Output("graph", "figure"),
    Input("var", "value"),
    Input("comparison", "value"),
    Input("connecting_line", "value"),
    Input("symbolic_mode", "value"),
    Input("height", "value"),
    Input("width", "value"),
    Input("top_margin", "value"),
    Input("interline", "value"),
    Input("color_line", "value"),
    Input("download", "n_clicks"),)
def update_plot(var: str, comparison: str, connecting_line: str, symbolic_mode: bool, height: int,
                width: int, top_margin: int, interline: int, color_line: str, download: int) -> go.Figure:
    y_var: str = var
    if var == "strem2_log10":
        title_str: str = "CSF sTREM2 (pg/mL)"
    elif var == "ptau_log10":
        title_str: str = "CSF pTau (ng/mL)"
    else:
        title_str: str = f"Plasma {var} (nM)"
    if comparison == "none":
        fig: go.Figure = px.strip(df, x="stage_label", y=y_var, category_orders={"stage_label": stage_label_list}, 
                                  stripmode="overlay", color="stage_label", color_discrete_sequence=px.colors.qualitative.G10)
        if connecting_line != "none":
            if connecting_line == "mean":
                y_values: list[float] = [df.loc[df["stage_label"] == stage_label, y_var].mean() for stage_label in stage_label_list]
            elif connecting_line == "median":
                y_values: list[float] = [df.loc[df["stage_label"] == stage_label, y_var].median() for stage_label in stage_label_list]
            fig.add_trace(go.Scatter(x=stage_label_list, y=y_values, mode="lines", showlegend=False, line=dict(color=color_line)))
        fig: go.Figure = add_pairwise_comparison(fig, df, stage_list, y_var, comparison, "Tukey p-value",
                                                 height, width, interline, symbolic_mode, df_tukey)
        fig: go.Figure = add_box(fig, df, stage_list, y_var, comparison, width)
    else:
        comparison_var, comparison_values = comparison_dict[comparison]
        fig: go.Figure = px.strip(df, x="stage_label", y=y_var, color=comparison, stripmode="group",
                                  category_orders={"stage_label": stage_label_list, comparison: comparison_values})
        fig: go.Figure = add_pairwise_comparison(fig, df, stage_list, y_var, comparison_var, "t-test p-value",
                                                 height, width, interline, symbolic_mode)
        fig: go.Figure = add_box(fig, df, stage_list, y_var, comparison_var, width)
    fig.update_traces(marker=dict(size=5), selector=dict(mode="markers"))
    fig: go.Figure = standard_layout(fig, False)
    fig.update_layout(
        yaxis=dict(title=title_str + "<br>(log10 transformed)"),
        height=height,
        width=width,
        margin=dict(t=top_margin, b=100),
        showlegend=(comparison != "none"),
    )
    # Download as PDF+PNG
    if download:
        fig.write_image(path_output_figure / f"boxplot_{var}_{comparison}.pdf")
        fig.write_image(path_output_figure / f"boxplot_{var}_{comparison}.png", scale=2)
    return fig

In [15]:
app.run(debug=True, jupyter_height=1100, port=7530, use_reloader=False)