In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
# colourblind-friendly palette from colour universal design (CUD)
# https://jfly.uni-koeln.de/color/
# https://nanx.me/oneclust/reference/cud.html
named_colors = {
    "black": "#000000",
    "orange": "#e69f00",
    "skyblue": "#56b4e9",
    "bluish green": "#009e73",
    "yellow": "#f0e442",
    "blue": "#0072b2",
    "vermilion": "#d55e00",
    "reddish purple": "#cc79a7",
}
cud_palette = list(named_colors.values())
sns.set_theme(style="whitegrid")

def cud(n: int = len(cud_palette), start: int = 0) -> list[str]:
    remainder = cud_palette[:start]
    palette = cud_palette[start:] + remainder
    return palette[:n]

In [3]:
frames = []
for p in Path("../results/assess/mutref").rglob("*.precision-recall.tsv"):
    df = pd.read_csv(p, sep="\t")
    df["sample"] = p.parent.name
    if "without_repetitive_regions" in p.name:
        df["mask_repeats"] = True
    else:
        df["mask_repeats"] = False
    if "illumina" in str(p):
        df["caller"] = "illumina"
        df["depth"] = "illumina"
        df["mode"] = "simplex"
        df["version"] = "illumina"
        df["model"] = "illumina"
    else:
        df["caller"] = p.parts[-7]
        df["depth"] = int(p.parts[-6][:-1])
        df["mode"] = p.parts[-5]
        df["version"] = p.parts[-4]
        df["model"] = p.parts[-3].split("_")[-1].split("@")[0]
    frames.append(df)
    # duplicate Illumina as duplex also
    if "illumina" in str(p):
        df2 = df.copy()
        df2["caller"] = "illumina"
        df2["depth"] = "illumina"
        df2["mode"] = "duplex"
        df2["version"] = "illumina"
        df2["model"] = "illumina"
        frames.append(df2)

In [4]:
pr_df = pd.concat(frames)
pr_df.reset_index(inplace=True)
pr_df.head()

: 