In [40]:
from itertools import product
import sys

import pandas as pd
import altair as alt
from altair import datum

sys.stderr = open(snakemake.log[0], "w")


report = pd.read_csv(snakemake.input[0], sep="\t")
chart = None

report["coverage"] = report["coverage"].apply(lambda cov: snakemake.params.cov_labels[cov])
report_final = report[["coverage", "vartype", "correct genotype", "precision", "recall"]].set_index(["coverage", "vartype", "correct genotype"]).stack().reset_index()
report_final.columns = ["coverage", "vartype", "correct genotype", "metric", "value"]

coverages = report["coverage"].unique()
vartypes = report["vartype"].unique()

report_final.set_index(["coverage", "vartype", "metric", "correct genotype"], inplace=True)
report_final["numbers"] = None

for cov, vartype in product(coverages, vartypes):
    report_ = report.loc[(report["coverage"] == cov) & (report["vartype"] == vartype) & (report["correct genotype"] == "no")].iloc[0]
    report_final.loc[(cov, vartype, "precision"), "numbers"] = f"{report_.at['tp_query']}/{report_.at['tp_query'] + report_.at['fp']}"
    report_final.loc[(cov, vartype, "recall"), "numbers"] = f"{report_.at['tp_truth']}/{report_.at['tp_truth'] + report_.at['fn']}"
report_final.reset_index(inplace=True)


In [41]:
cov_sort = [snakemake.params.cov_labels[cov] for cov in ["high", "medium", "low"]]

def plot(correct_genotype):
    chart = alt.Chart().mark_bar().encode(
        alt.X("value", title=None),
        alt.Y("coverage", sort=cov_sort),
        color=alt.Color("correct genotype", type="ordinal", sort="descending"),
    ).transform_filter(
        datum["correct genotype"] == correct_genotype
    )

    return chart

def plot_numbers():
    return alt.Chart().mark_text(
            color="black",
            align="left",
            baseline="middle",
            dx=4,
        ).encode(
            alt.X("value", title=None),
            alt.Y("coverage", sort=cov_sort),
            text="numbers"
        ).transform_filter(
            datum["correct genotype"] == "no"
        )

chart = alt.layer(plot("no"), plot("yes"), plot_numbers(), data=report_final).facet(
    row=alt.Row("metric", title=None),
    column=alt.Column("vartype", title=None, sort=["snvs", "indels"]),
)

In [43]:
chart.save(snakemake.output[0])