In [1]:
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import sys

sys.path.append("../../")
from lib.plotly import standard_layout, add_pairwise_comparison, add_box

In [2]:
# Define I/O paths
path_input_demographics: Path = Path(
    "../../../data/processed/adni/demographics_tau.csv"
).resolve()
path_input_dict: Path = Path("../../../data/processed/adni/somascan_dict.csv").resolve()
path_input_proteomics: Path = Path(
    "../../../data/processed/adni/somascan.csv"
).resolve()
path_output_figure: Path = Path("../../../assets/figures/adni/").resolve()

In [3]:
# Input files
df_demographics: pd.DataFrame = pd.read_csv(path_input_demographics).convert_dtypes()
df_dict: pd.DataFrame = pd.read_csv(path_input_dict).convert_dtypes()
df_proteomics: pd.DataFrame = pd.read_csv(path_input_proteomics).convert_dtypes()

In [4]:
# Join cognitive status info with proteomics data
df: pd.DataFrame = df_demographics.join(
    df_proteomics.set_index("RID"), on="RID", how="inner"
)

In [5]:
# Map stage to string labels
df["stage"] = df["cog"].map({0: "CN or SMC", 1: "EMCI or LMCI", 2: "AD"})
stage_list: list[str] = ["CN or SMC", "EMCI or LMCI", "AD"]

In [6]:
# fmt: off
app: Dash = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = html.Div([
        dbc.Row([
            dbc.Col([
                dbc.Label(children="select a protein"),
                dcc.Dropdown(options=df_dict["target_full_name"].tolist(), value="Apolipoprotein E (isoform E4)", id="y_var"),
                html.Hr(),
                dbc.Label(children="add connecting lines"),
                dbc.RadioItems(options=["no", "mean", "median"], value="no", id="add_connecting_lines", inline=True),
                html.Hr(),
                dbc.Switch(id="switch_symbolic_mode", value=True, label="symbolic mode"),
                ], width={"size": 3, "offset": 1}),
            dbc.Col([
                dbc.Label(children="adjust height"),
                dcc.Slider(id="height_slider", min=200, max=1000, step=25, value=550, marks={x: str(x) for x in list(range(200, 1000 + 200, 200))},),
                dbc.Label(children="adjust width"),
                dcc.Slider(id="width_slider", min=400, max=1400, step=25, value=800, marks={x: str(x) for x in list(range(400, 1400 + 200, 200))},),
                dbc.Label(children="top margin"),
                dcc.Slider(id="top_margin", min=100, max=500, step=25, value=150, marks={x: str(x) for x in list(range(100, 500 + 100, 100))},),
                dbc.Label(children="annotation: interline"),
                dcc.Slider(id="interline", min=10, max=100, step=5, value=65, marks={x: str(x) for x in list(range(10, 110 + 10, 20))},),
                ], width=4),
            dbc.Col([
                dbc.Label("line color"),
                dbc.Input(type="color", id="color_line", value="#000000", style={"width": 75, "height": 50},),
                html.Hr(),
                dbc.Button(children="download figure PDF+PNG", id="download", n_clicks=0),
                ], width=3),
            ], align="center"),
        dbc.Col(dcc.Graph(id="graph"), width={"size": 6, "offset": 1}),
    ],
    style={"backgroundColor": "white"},
)

In [7]:
# fmt: off
@app.callback(
    Output("graph", "figure"),
    Input("y_var", "value"),
    Input("height_slider", "value"),
    Input("width_slider", "value"),
    Input("top_margin", "value"),
    Input("interline", "value"),
    Input("add_connecting_lines", "value"),
    Input("color_line", "value"),
    Input("switch_symbolic_mode", "value"),
    Input("download", "n_clicks"),
)
def resize_figure(var: str, height: int, width: int, top_margin: int, interline: int,
                  add_connecting_lines: bool, line_color: str, symbolic_mode: bool, download: int):
    y_var: str = df_dict.set_index("target_full_name").at[var, "label"]
    fig: go.Figure = px.strip(df, x="stage", y=y_var, category_orders={"stage": stage_list},
                              stripmode="overlay", color="stage", color_discrete_sequence=px.colors.qualitative.G10,)
    if add_connecting_lines == "mean":
        y_value: list[pd.Series | float] = [df.loc[df["stage"] == stage, y_var].mean() for stage in stage_list]
        fig.add_trace(go.Scatter(x=stage_list,y=y_value,mode="lines",showlegend=False,line=dict(color=line_color)))
    elif add_connecting_lines == "median":
        y_value: list[pd.Series | float] = [df.loc[df["stage"] == stage, y_var].median() for stage in stage_list]
        fig.add_trace(go.Scatter(x=stage_list, y=y_value, mode="lines", showlegend=False, line=dict(color=line_color)))
    fig: go.Figure = add_pairwise_comparison(fig, df, stage_list, y_var, "none", "t-test p-value", height, width, interline, symbolic_mode)
    fig = add_box(fig, df, stage_list, y_var, "none", width)
    fig.update_traces(marker=dict(size=5), selector=dict(mode="markers"))
    fig: go.Figure = standard_layout(fig, False)
    fig.update_layout(
        yaxis=dict(title=var),
        height=height,
        width=width,
        margin=dict(t=top_margin, b=100),
        showlegend=True,
    )
    # Download as PDF+PNG
    if download > 0:
        fig.write_image(path_output_figure / f"boxplot_{var}.pdf")
        fig.write_image(path_output_figure / f"boxplot_{var}.png", scale=2)
    return fig

In [9]:
app.run(debug=True, jupyter_height=1000, port=7521, use_reloader=False)