# Visualize metapath performance against permutation

In [1]:
library(magrittr)

In [2]:
metaedge_df = readr::read_tsv('data/metaedge-in-metapath.tsv') %>%
  dplyr::select(-position) %>%
  dplyr::mutate(length = as.character(length))

all_abbrevs = unique(metaedge_df$metaedge_abbrev)

# Read feature-sepcific performance for DWPC features and correct for multiple testing
metapath_df = readr::read_tsv('data/feature-performance/auroc.tsv') %>%
  dplyr::mutate(
    fwer_delta_auroc = p.adjust(p = pval_delta_auroc, method = 'bonferroni'),
    fdr_delta_auroc = p.adjust(p = pval_delta_auroc, method = 'fdr')
  ) 

fsp_df = metapath_df %>%
  dplyr::inner_join(metaedge_df)

metaedge_df = fsp_df %>%
  dplyr::group_by(metaedge_abbrev, metaedge) %>%
  dplyr::summarize(
    max_dwpc_auroc = max(dwpc_auroc),
    max_delta_auroc = max(delta_auroc),
    max_rdwpc_auroc = max(rdwpc_auroc)
  ) %>%
  dplyr::ungroup() %>%
  dplyr::arrange(max_delta_auroc, desc(metaedge_abbrev))

Parsed with column specification:
cols(
  metapath = col_character(),
  metaedge = col_character(),
  metaedge_abbrev = col_character(),
  position = col_integer(),
  length = col_integer()
)
Parsed with column specification:
cols(
  metapath = col_character(),
  dwpc_auroc = col_double(),
  pdwpc_auroc = col_double(),
  rdwpc_auroc = col_double(),
  nonzero = col_double(),
  pdwpc_primary_auroc = col_double(),
  delta_auroc = col_double(),
  pval_delta_auroc = col_double()
)
Joining, by = "metapath"


In [3]:
fsp_df %>% tail(2) 

metapath,dwpc_auroc,pdwpc_auroc,rdwpc_auroc,nonzero,pdwpc_primary_auroc,delta_auroc,pval_delta_auroc,fwer_delta_auroc,fdr_delta_auroc,metaedge,metaedge_abbrev,length
CsdiCsdiCsdiCduftD,0.5347,0.53066,0.50326,0.035972,0.52043,0.014268,0.00017719,0.0124033,0.0002818932,Compound - significant-drug-interaction - Compound,CsdiC,4
CsdiCsdiCsdiCduftD,0.5347,0.53066,0.50326,0.035972,0.52043,0.014268,0.00017719,0.0124033,0.0002818932,Disease - drug-used-for-treatment - Compound,DduftC,4


In [4]:
#abbrev_to_metaedge = setNames(metaedge_df$metaedge_abbrev, metaedge_df$metaedge)

In [4]:
length_to_color = c('1' = '#b2df8a', '2' = '#1f78b4', '3' = '#33a02c', '4' = '#a6cee3')

In [5]:
w = 7; h = 5.5
options(repr.plot.width=w, repr.plot.height=h)
set.seed(0)
gg_fsp = fsp_df %>%
  dplyr::distinct(metapath, metaedge) %>%
  dplyr::mutate(title = 'Feature Performance by Metaedge') %>%
  ggplot2::ggplot(ggplot2::aes(x = delta_auroc, y = metaedge)) +
  ggplot2::geom_vline(xintercept = 0, linetype='dashed', color='gray') +
  ggplot2::geom_jitter(ggplot2::aes(fill = length), height=0.4, alpha=1, shape = 21, size=2, color= '#6C6C6C', stroke=0.7) +
  ggplot2::facet_grid(. ~ title) +
  ggplot2::scale_fill_manual(name = 'Length', values=length_to_color) +
  ggplot2::scale_x_continuous(labels=scales::percent) +
  ggplot2::scale_y_discrete(limits = metaedge_df$metaedge) +
  ggplot2::xlab('Δ AUROC of DWPCs Due to Permutation') + ggplot2::ylab(NULL) +
  hetior::theme_dhimmel() +
  ggplot2::theme(
    legend.justification=c(1, 0), legend.position=c(1, 0))

ggplot2::ggsave('./data/feature-performance/delta-auroc.png', dpi=300, width = w, height = h)

ERROR: Error in loadNamespace(name): there is no package called ‘hetior’


## Percent of DWPC features passing bonferroni, FDR, and unadjusted significance thresholds

In [6]:
alpha = 0.05

metapath_df %$% mean(fwer_delta_auroc < alpha)
metapath_df %$% mean(fdr_delta_auroc < alpha)
metapath_df %$% mean(metapath_df$pval_delta_auroc < alpha)

In [7]:
nrow(metapath_df)

## Metaedges with a sigificant metapath

In [8]:
fsp_df %>%
  dplyr::filter(fdr_delta_auroc < alpha) %>%
  dplyr::group_by(metaedge, metaedge_abbrev) %>%
  dplyr::summarize(
    n_occurence = n(),
    n_unique_metapaths = n_distinct(metapath)
  ) %>%
  dplyr::ungroup() %>%
  dplyr::arrange(desc(n_unique_metapaths))

metaedge,metaedge_abbrev,n_occurence,n_unique_metapaths
Disease - drug-used-for-treatment - Compound,DduftC,78,58
Disease > subclass-of > Disease,DsoD,57,34
Compound - significant-drug-interaction - Compound,CsdiC,36,25
Compound > has-part > Compound,ChpC,24,21
Protein - physically-interacts-with - Compound,PpiwC,12,7
Gene - genetic-association - Disease,GgaD,6,4
Gene - encodes - Protein,GeP,2,2
