In [1]:
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score
import polars as pl

In [2]:
# TODO: add signs
matched_features = {
    "mendelian_traits_matched_9": ["tss_dist"],
    "complex_traits_matched_9": ["tss_dist", "maf", "ld_score"],
}
feature_signs = {
    "tss_dist": -1,
    "maf": 1,
    "ld_score": -1,
}
dataset_renaming = {
    "mendelian_traits_matched_9": "Mendelian traits",
    "complex_traits_matched_9": "Complex traits",
}
feature_renaming = {
    "tss_dist": "TSS distance",
    "maf": "MAF",
    "ld_score": "LD score",
}
rows = []
for dataset, features in matched_features.items():
    V = pl.read_parquet(f"../../results/dataset/{dataset}/test.parquet")
    for feature in features:
        rows.append({
            "Dataset": dataset_renaming.get(dataset, dataset),
            "Feature": feature_renaming.get(feature, feature),
            "AUPRC": average_precision_score(V["label"], V[feature] * feature_signs[feature]),
        })
df = pl.DataFrame(rows)
df

Dataset,Feature,AUPRC
str,str,f64
"""Mendelian traits""","""TSS distance""",0.114833
"""Complex traits""","""TSS distance""",0.10406
"""Complex traits""","""MAF""",0.10133
"""Complex traits""","""LD score""",0.104152


In [6]:
print(df.to_pandas().to_latex(index=False, multicolumn_format='c', escape=False, float_format="%.3f"))

\begin{tabular}{llr}
\toprule
Dataset & Feature & AUPRC \\
\midrule
Mendelian traits & TSS distance & 0.115 \\
Complex traits & TSS distance & 0.104 \\
Complex traits & MAF & 0.101 \\
Complex traits & LD score & 0.104 \\
\bottomrule
\end{tabular}

