In [1]:
import pyreadr
import polars as pl
import polars.selectors as cs

diffexp_x = pl.from_pandas(pyreadr.read_r(snakemake.input[0])[None]).lazy()
diffexp_y = pl.from_pandas(pyreadr.read_r(snakemake.input[1])[None]).lazy()
label_x = snakemake.params.labels[0]
label_y = snakemake.params.labels[1]

# diffexp_x = pl.from_pandas(pyreadr.read_r("/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/sleuth/diffexp/etoh_mgl_in_wt_vs_etho_in_wt_liver.genes-representative.diffexp.rds")[None]).lazy()
# diffexp_y = pl.from_pandas(pyreadr.read_r("/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/results/sleuth/diffexp/etoh_t3_in_wt_vs_etoh_in_wt_liver.genes-representative.diffexp.rds")[None]).lazy()
# label_x = "mgl"
# label_y = "t3"


effect_x = f"effect {label_x} (beta score)"
effect_y = f"effect {label_y} (beta score)"

In [2]:
def prepare(df):
    return df.select(
        cs.by_name("target_id", "ext_gene", "pval", "qval"),
        cs.starts_with("b_").alias("beta")
    ).select(
        cs.by_index(range(0, 5)) # only keep first b_ column
    )

In [3]:
combined = prepare(diffexp_x).join(
    prepare(diffexp_y), on=["target_id", "ext_gene"], suffix="_y"
).with_columns(
    pl.min_horizontal("qval", "qval_y").alias("qval_min"),
).filter(
    pl.col("qval_min") <= 0.05
).rename(
    {
        "beta": effect_x,
        "beta_y": effect_y,
        "qval_min": "min q-value",
    }
).collect()


In [10]:
effects = combined.select(pl.col(effect_x, effect_y))
min_value = effects.min().min_horizontal()[0]
max_value = effects.max().max_horizontal()[0]
combined = combined.with_columns(
    abs(pl.col(effect_x) - pl.col(effect_y)).alias("difference")
)
combined_sorted = combined.sort("difference", descending=True)
combined_pd = combined_sorted.select(
    pl.col("ext_gene", "min q-value", effect_x, effect_y)
).to_pandas()
combined_pd.to_csv(snakemake.output[0], sep="\t", index=False)
# combined_pd.to_csv("/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.txt", sep="\t", index=False)


In [11]:
combined_sorted


target_id,ext_gene,pval,qval,effect mgl (beta score),pval_y,qval_y,effect t3 (beta score),min q-value,difference
str,str,f64,f64,f64,f64,f64,f64,f64,f64
"""ENSMUST00000029670.7""","""Ptgfr""",,,,5.8900e-8,0.0000174,6.63,0.0000174,
"""ENSMUST00000062307.5""","""Phf11a""",,,,9.9300e-7,0.000106,4.59,0.000106,
"""ENSMUST00000150649.9""","""Ifi213""",,,,0.0000104,0.000586,4.28,0.000586,
"""ENSMUST00000033617.13""","""Btk""",,,,0.0000138,0.000733,3.97,0.000733,
"""ENSMUST00000005950.12""","""Mmp12""",,,,0.0000152,0.000793,4.55,0.000793,
…,…,…,…,…,…,…,…,…,…
"""ENSMUST00000034860.5""","""Cyp1a2""",3.0000e-11,9.2300e-9,-1.94,0.0000018,0.000171,-1.94,9.2300e-9,0.0
"""ENSMUST00000093485.3""","""Ddx60""",0.000008,0.000121,2.81,0.0000504,0.00196,2.81,0.000121,0.0
"""ENSMUST00000020301.14""","""Vsir""",0.000796,0.00445,1.3,0.00449,0.0473,1.3,0.00445,0.0
"""ENSMUST00000040336.12""","""Slc22a23""",0.014,0.0358,-0.465,0.0177,0.112,-0.465,0.0358,0.0


In [None]:
import altair as alt
import sys
# we cannot use vegafusion here because it makes the point selection impossible since
# it prunes the required ext_gene column
#alt.data_transformers.enable("vegafusion")
alt.data_transformers.disable_max_rows()

point_selector = alt.selection_point(fields=["ext_gene"], empty=False)

points = alt.Chart(combined_pd).mark_circle(size=15, tooltip={"content": "data"}).encode(
    alt.X(effect_x),
    alt.Y(effect_y),
    alt.Color("min q-value", scale=alt.Scale(scheme="viridis")),
    opacity=alt.value(0.5),
)

line = alt.Chart(
    pl.DataFrame({effect_x: [min_value, max_value], effect_y: [min_value, max_value]})
).mark_line(color="lightgrey").encode(
    x=effect_x,
    y=effect_y,
    strokeDash=alt.value([5, 5]),
)

text_background = alt.Chart(combined_pd).mark_text(
        align="left",
        baseline="middle",
        dx=5,
        dy=-5,
        fill='white',
        stroke='white',
        strokeWidth=5,
    ).encode(
        x=effect_x,
        y=effect_y,
        text=alt.condition(point_selector, "ext_gene", alt.value("")),
    )

text = alt.Chart(combined_pd).mark_text(
    align="left",
    baseline="middle",
    dx=5,
    dy=-5,
).encode(
    x=effect_x,
    y=effect_y,
    text=alt.condition(point_selector, "ext_gene", alt.value("")),
)


chart = alt.layer(line, points, text_background, text).add_params(
    point_selector
).interactive()

display(chart)

# chart.save("/projects/koesterlab/moeller-th-liver-diffexp/analysis_2/test.html", inline=True)

chart.save(snakemake.output[1], inline=True)