In [3]:
import pyreadr
import polars as pl
import polars.selectors as cs

diffexp_x = pl.read_csv(snakemake.input[0], separator="\t").lazy()
diffexp_y = pl.read_csv(snakemake.input[1], separator="\t").lazy()
label_x = snakemake.params.labels[0]
label_y = snakemake.params.labels[1]

# diffexp_x = pl.read_csv("/projects/koesterlab/moeller-th-liver-diffexp/moeller-rna-liver-2024/results/tables/pathways/etoh_mgl_in_wt_vs_etho_in_wt_liver.pathways.tsv", separator="\t").lazy()
# diffexp_y = pl.read_csv("/projects/koesterlab/moeller-th-liver-diffexp/moeller-rna-liver-2024/results/tables/pathways/etoh_t3_in_wt_vs_etoh_in_wt_liver.pathways.tsv", separator="\t").lazy()
# label_x = "mgl"
# label_y = "t3"

effect_x = f"effect {label_x}"
effect_y = f"effect {label_y}"


In [4]:
def prepare(df):
    # Select necessary columns and filter
    df = df.select([
               cs.by_name("Name", "Combined p-value", "Combined FDR", "total perturbation accumulation", "pathway id")
           ])
    return df

In [5]:
prepared_diffexp_x = prepare(diffexp_x)
prepared_diffexp_y = prepare(diffexp_y)
combined = prepared_diffexp_x.join(
    prepared_diffexp_y, on=["Name"], suffix="_y"
).with_columns(
    pl.min_horizontal("Combined FDR", "Combined FDR_y").alias("fdr_min")
).filter(
    pl.col("fdr_min") <= 0.05
).rename(
    {
        "total perturbation accumulation": effect_x,
        "total perturbation accumulation_y": effect_y,
        "fdr_min": "min fdr",
    }
).collect()

In [6]:
combined

Name,Combined p-value,Combined FDR,effect mgl,pathway id,Combined p-value_y,Combined FDR_y,effect t3,pathway id_y,min fdr
str,f64,f64,f64,str,f64,f64,f64,str,f64
"""Metabolism of RNA""",3.6577e-104,2.7835e-101,-153.047343,"""R-MMU-8953854""",0.000008,0.000236,-47.991535,"""R-MMU-8953854""",2.7835e-101
"""Translation""",3.7171e-69,1.4143e-66,-25.400783,"""R-MMU-72766""",0.025606,0.102425,-7.763833,"""R-MMU-72766""",1.4143e-66
"""Amino acid and derivative meta…",3.1699e-51,7.4966e-49,8.696646,"""R-MMU-71291""",1.1662e-24,3.5452e-22,-1.63,"""R-MMU-71291""",7.4966e-49
"""rRNA processing""",3.9404e-51,7.4966e-49,-140.95763,"""R-MMU-72312""",0.000175,0.003048,-37.557806,"""R-MMU-72312""",7.4966e-49
"""rRNA processing in the nucleus…",5.8909e-51,8.9659e-49,-139.377208,"""R-MMU-8868773""",0.000188,0.003176,-37.46126,"""R-MMU-8868773""",8.9659e-49
…,…,…,…,…,…,…,…,…,…
"""FCERI mediated Ca+2 mobilizati…",0.21634,0.27763,1.655,"""R-MMU-2871809""",0.002636,0.026708,7.439,"""R-MMU-2871809""",0.026708
"""DAP12 interactions""",0.227572,0.288637,3.038273,"""R-MMU-2172127""",0.005966,0.046887,5.972909,"""R-MMU-2172127""",0.046887
"""Interleukin-3, Interleukin-5 a…",0.344125,0.40087,0.0,"""R-MMU-512988""",0.003173,0.02905,1.02,"""R-MMU-512988""",0.02905
"""Collagen formation""",0.419314,0.476266,1.26257,"""R-MMU-1474290""",0.004462,0.03743,-15.229161,"""R-MMU-1474290""",0.03743


In [7]:
effects = combined.select(pl.col(effect_x, effect_y))
min_value = effects.min().min_horizontal()[0]
max_value = effects.max().max_horizontal()[0]
combined = combined.with_columns(
    abs(pl.col(effect_x) - pl.col(effect_y)).alias("difference")
)
combined_sorted = combined.sort("difference", descending=True)
combined_pd = combined_sorted.select(
    pl.col("Name", "min fdr", effect_x, effect_y, "difference", "pathway id")
).to_pandas()
combined_pd.to_csv(snakemake.output[0], sep="\t", index=False)
# combined_pd.to_csv("/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/tables/pathways/meta_compare_etoh_wt_mgl_vs_t3.tsv", sep="\t", index=False)
 

ModuleNotFoundError: pa.Table requires 'pyarrow' module to be installed

In [None]:
import altair as alt
import sys
from IPython.display import display

# we cannot use vegafusion here because it makes the point selection impossible since
# it prunes the required ext_gene column
#alt.data_transformers.enable("vegafusion")
alt.data_transformers.disable_max_rows()


# Data transformation
alt.data_transformers.disable_max_rows()

# Selektor für Punkte
point_selector = alt.selection_point(fields=["Name"], empty=False)

# Punkte
points = alt.Chart(combined_pd).mark_circle(size=15, tooltip={"content": "data"}).encode(
    alt.X(effect_x, scale=alt.Scale(type='symlog', nice=False), axis=alt.Axis(grid=False)),
    alt.Y(effect_y, scale=alt.Scale(type='symlog', nice=False), axis=alt.Axis(grid=False)),
    alt.Color("min fdr", scale=alt.Scale(scheme="viridis")),
    opacity=alt.value(0.5),
)

# Gestrichelte Linie
line = alt.Chart(
    pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})
).mark_line(color="lightgrey").encode(
    x=effect_x,
    y=effect_y,
    strokeDash=alt.value([5, 5]),
)

# Gestrichelte Linie
x_axis = alt.Chart(
    pl.DataFrame({effect_x: [0, 0], effect_y: [min_value, max_value]})
).mark_line(color="lightgrey").encode(
    x=effect_x,
    y=effect_y,
    strokeDash=alt.value([5, 5]),
)

y_axis = alt.Chart(
    pl.DataFrame({effect_x: [min_value, max_value], effect_y: [0, 0]})
).mark_line(color="lightgrey").encode(
    x=effect_x,
    y=effect_y,
    strokeDash=alt.value([5, 5]),
)

text_background = alt.Chart(combined_pd).mark_text(
        align="left",
        baseline="middle",
        dx=5,
        dy=-5,
        fill='white',
        stroke='white',
        strokeWidth=5,
    ).encode(
        x=effect_x,
        y=effect_y,
        text=alt.condition(point_selector, "Name", alt.value("")),
    )


# Text
text = alt.Chart(combined_pd).mark_text(
    align="left",
    baseline="middle",
    dx=5,
    dy=-5,
).encode(
    x=effect_x,
    y=effect_y,
    text=alt.condition(point_selector, "Name", alt.value("")),
)

# Null-Linien für x und y
zero_lines = alt.Chart(pl.DataFrame({"zero": [0]})).mark_rule(color="black").encode(
    x=alt.X("zero", axis=alt.Axis(title="")),  # keine Beschriftung der Achse
    y=alt.Y("zero", axis=alt.Axis(title="")),  # keine Beschriftung der Achse
)

# Gesamtes Diagramm mit den Ebenen
chart = alt.layer(x_axis, y_axis, line, points, text_background, text).add_params(
    point_selector
).interactive()

# display(chart)

chart.save(snakemake.output[1], inline=True)
# chart.save("/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/meta_comparison/pathways/etoh_wt_mgl_vs_t3.json", inline=True)

