In [3]:
library(tidyverse)
library(arrow)


Attaching package: ‘arrow’


The following object is masked from ‘package:lubridate’:

    duration


The following object is masked from ‘package:utils’:

    timestamp




In [4]:
full_df <- read_tsv_arrow("data/full_summary.tsv.zst") %>%
    mutate(
        i = str_extract(path, "(?<=sim_)[0-9]+"),
        h = str_extract(path, "(?<=h_)[\\.0-9]+"),
        s = str_extract(path, "(?<=s_)[\\.0-9]+"),
        p = str_extract(path, "(?<=p_)[\\.0-9]+"),
        across(c(i, h, s, p), as.double),
    ) %>%
    distinct %>%
    mutate(
        missingness = if_else(
            str_detect(method, "m_[0-9]+"), 
            str_extract(method, "(?<=m_)([0-9\\.]+)") %>% as.double, 
            0
        ),
        variance = if_else(
            str_detect(method, "v_[0-9]+"),
            str_extract(method, "(?<=v_)([0-9\\.]+)") %>% as.double,
            0
        ),
        F1 = 2 * TP / (2 * TP + FP + FN),
    )

full_df %>% dim

full_df %>% head(2)

phenotype,method,P,N,TP,TN,FP,FN,total,bias,⋯,sensitivity,specificity,path,i,h,s,p,missingness,variance,F1
<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,⋯,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Trait_14,liability,593,1491,106,1423,68,487,2084,0.0007791386,⋯,0.1787521,0.954393,data/simulations/sim_29_h_0.1_s_0.5_p_0.7/summary/liability.tsv,29,0.1,0.5,0.7,0,0,0.2764016
Trait_44,liability,992,1516,410,1456,60,582,2508,-0.000449234,⋯,0.4133065,0.9604222,data/simulations/sim_29_h_0.1_s_0.5_p_0.7/summary/liability.tsv,29,0.1,0.5,0.7,0,0,0.5608755


In [23]:
summary_df <- full_df %>%
    mutate(
        method = method %>%
            recode_factor(
                liability = "Liability",
                `binary_m_0.0` = "Binary",
                `binary_m_0.25` = "Naive",
                `maxgcp_m_0.25_v_0.05` = "MaxGCP (noisy)",
                `maxgcp_m_0.25_v_0.0` = "MaxGCP (optimal)",
                optimal = "Optimal linear",
                g = "Genetic component",
            ),
        dashed = method %in% c("Liability", "Binary", "Optimal linear", "Genetic component"),
    ) %>%
    filter(!str_detect(method, "_")) %>%
    filter(h == 0.5, s == 0.9, p == 0.0) %>%
    distinct %>%
    select(-path, -dashed, -bias, -rmse, -h, -s, -p, -missingness, -variance) %>%
    group_by(method) %>%
    summarize(
        across(c(P, N, TP, TN, FP, FN, total), sum),
        across(c(AUROC, sensitivity, specificity, F1), median)
    ) %>%
    mutate(precision = TP / (TP + FP)) %>%
    select(method, TP, FP, TN, FN, sensitivity, specificity, precision, F1, AUROC) %>%
    arrange(desc(method))

summary_df

method,TP,FP,TN,FN,sensitivity,specificity,precision,F1,AUROC
<fct>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Genetic component,4726856,0,7461201,0,1.0,1.0,1.0,1.0,1.0
Optimal linear,3299208,281133,7180068,1427648,0.7171336,0.9648308,0.9214787,0.8119919,0.9298787
MaxGCP (optimal),3139413,286375,7174825,1587443,0.6858882,0.9637272,0.9164061,0.7899972,0.9114712
MaxGCP (noisy),2205775,419023,7042178,2521078,0.4695948,0.9441454,0.8403599,0.6045488,0.8158587
Naive,1361350,335394,7125807,3365505,0.2781072,0.9551935,0.8023308,0.4120478,0.7224196
Binary,1815400,315041,7146160,2911455,0.3728173,0.957909,0.852124,0.5175898,0.7896107
Liability,2697855,253655,7207545,2028999,0.5511655,0.9640523,0.9140592,0.6840696,0.8947844


In [25]:
knitr::kable(summary_df, format = "latex", digits = 2)


\begin{tabular}{l|r|r|r|r|r|r|r|r|r}
\hline
method & TP & FP & TN & FN & sensitivity & specificity & precision & F1 & AUROC\\
\hline
Genetic component & 4726856 & 0 & 7461201 & 0 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00\\
\hline
Optimal linear & 3299208 & 281133 & 7180068 & 1427648 & 0.72 & 0.96 & 0.92 & 0.81 & 0.93\\
\hline
MaxGCP (optimal) & 3139413 & 286375 & 7174825 & 1587443 & 0.69 & 0.96 & 0.92 & 0.79 & 0.91\\
\hline
MaxGCP (noisy) & 2205775 & 419023 & 7042178 & 2521078 & 0.47 & 0.94 & 0.84 & 0.60 & 0.82\\
\hline
Naive & 1361350 & 335394 & 7125807 & 3365505 & 0.28 & 0.96 & 0.80 & 0.41 & 0.72\\
\hline
Binary & 1815400 & 315041 & 7146160 & 2911455 & 0.37 & 0.96 & 0.85 & 0.52 & 0.79\\
\hline
Liability & 2697855 & 253655 & 7207545 & 2028999 & 0.55 & 0.96 & 0.91 & 0.68 & 0.89\\
\hline
\end{tabular}