Merge pull request #11 from noamteyssier/10-perform-inc-always-with-g…

…ene-product 10 perform inc always with gene product
noamteyssier · May 9, 2023 · 63c6d1b · 63c6d1b
2 parents 47b7268 + b227e65
commit 63c6d1b
Show file tree

Hide file tree

Showing 7 changed files with 232 additions and 60 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "intc"
-version = "0.1.4"
+version = "0.2.0"
 edition = "2021"
 description = "An implementation of the *-INC method to calculate an empirical FDR for non-targeting controls in CRISPR screens "
 license = "MIT"

diff --git a/src/fdr.rs b/src/fdr.rs
@@ -1,6 +1,12 @@
-use crate::utils::{argsort, argsort_vec};
+use crate::utils::{argsort, argsort_vec, diagonal_product};
 use ndarray::{Array1, Axis};
 
+#[derive(Clone, Copy, Debug)]
+pub enum Direction {
+    Less,
+    Greater,
+}
+
 #[derive(Debug)]
 /// Result struct for False Discovery Rate and p-value threshold
 pub struct FdrResult {
@@ -35,29 +41,48 @@ impl FdrResult {
 /// False Discovery Rate
 pub struct Fdr<'a> {
     pvalues: &'a Array1<f64>,
+    product: Array1<f64>,
     ntc_indices: &'a [usize],
     alpha: f64,
+    use_product: Option<Direction>,
 }
 
 impl<'a> Fdr<'a> {
     /// Create a new FDR struct
-    pub fn new(pvalues: &'a Array1<f64>, ntc_indices: &'a [usize], alpha: f64) -> Self {
+    pub fn new(
+        pvalues: &'a Array1<f64>,
+        logfc: &'a Array1<f64>,
+        ntc_indices: &'a [usize],
+        alpha: f64,
+        use_product: Option<Direction>,
+    ) -> Self {
         Self {
             pvalues,
             ntc_indices,
             alpha,
+            use_product,
+            product: diagonal_product(logfc, pvalues),
         }
     }
 
     /// Fit the FDR
     pub fn fit(&self) -> FdrResult {
-        let order = argsort(self.pvalues);
+        let values = match self.use_product {
+            Some(Direction::Less) => &self.product,
+            Some(Direction::Greater) => &self.product,
+            None => &self.pvalues,
+        };
+        let order = match self.use_product {
+            Some(Direction::Less) => argsort(&self.product, true),
+            Some(Direction::Greater) => argsort(&self.product, false),
+            None => argsort(&self.pvalues, true),
+        };
         let reorder = argsort_vec(&order);
         let is_ntc = Self::ntc_mask(self.ntc_indices, self.pvalues.len());
-        let sorted_pvalues = self.pvalues.select(Axis(0), &order);
+        let sorted_values = values.select(Axis(0), &order);
         let sorted_ntc = is_ntc.select(Axis(0), &order);
         let sorted_fdr = Self::empirical_fdr(&sorted_ntc);
-        let threshold = Self::threshold(&sorted_pvalues, &sorted_fdr, self.alpha);
+        let threshold = Self::threshold(&sorted_values, &sorted_fdr, self.alpha);
         let unsorted_fdr = sorted_fdr.select(Axis(0), &reorder);
         FdrResult::new(unsorted_fdr, threshold)
     }
@@ -87,11 +112,11 @@ impl<'a> Fdr<'a> {
     }
 
     /// Calculate the p-value threshold
-    fn threshold(pvalues: &Array1<f64>, fdr: &Array1<f64>, alpha: f64) -> f64 {
+    fn threshold(values: &Array1<f64>, fdr: &Array1<f64>, alpha: f64) -> f64 {
         let fdr_pval = fdr
             .iter()
-            .zip(pvalues.iter())
-            .take_while(|(fdr, _pvalue)| *fdr <= &alpha)
+            .zip(values.iter())
+            .take_while(|(fdr, _value)| *fdr <= &alpha)
             .reduce(|_x, y| y);
         if let Some(fp) = fdr_pval {
             *fp.1
@@ -109,27 +134,30 @@ mod testing {
     #[test]
     fn test_fdr() {
         let pvalues = array![0.1, 0.2, 0.3];
+        let logfc = array![0.1, 0.2, 0.3];
         let ntc_indices = vec![1];
         let alpha = 0.1;
-        let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
+        let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
         assert_eq!(fdr.fdr(), array![0.0, 0.5, 1. / 3.]);
     }
 
     #[test]
     fn test_fdr_unsorted() {
         let pvalues = array![0.2, 0.1, 0.3];
+        let logfc = array![0.1, 0.2, 0.3];
         let ntc_indices = vec![1];
         let alpha = 0.1;
-        let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
+        let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
         assert_eq!(fdr.fdr(), array![0.5, 1.0, 1. / 3.]);
     }
 
     #[test]
     fn test_fdr_unsorted_larger() {
         let pvalues = array![0.5, 0.1, 0.3, 0.4, 0.2, 0.6];
+        let logfc = array![0.5, 0.1, 0.3, 0.4, 0.2, 0.6];
         let ntc_indices = vec![3, 5];
         let alpha = 0.1;
-        let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
+        let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
         assert_eq!(fdr.fdr(), array![0.2, 0.0, 0.0, 0.25, 0.0, 1. / 3.]);
     }
 
@@ -144,9 +172,10 @@ mod testing {
     fn test_fdr_threshold() {
         let m = 10;
         let pvalues = Array1::linspace(0.0, 1.0, m);
+        let logfc = Array1::linspace(0.0, 1.0, m);
         let ntc_indices = vec![4, 6, 9];
         let alpha = 0.1;
-        let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
+        let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
         assert_eq!(fdr.threshold(), 1. / 3.);
     }
 }
diff --git a/src/inc.rs b/src/inc.rs
@@ -1,78 +1,114 @@
 use crate::{
     encode::EncodeIndex,
+    fdr::Direction,
+    mwu::Alternative,
     rank_test::{pseudo_rank_test, rank_test},
     result::IncResult,
-    utils::{build_pseudo_names, reconstruct_names, select_ranks, validate_token}, mwu::Alternative,
+    utils::{
+        aggregate_fold_changes, build_pseudo_names, reconstruct_names, select_values,
+        validate_token,
+    },
 };
 use anyhow::Result;
 use ndarray::Array1;
 
 #[derive(Debug)]
 pub struct Inc<'a> {
     pvalues: &'a Array1<f64>,
+    logfc: &'a Array1<f64>,
     genes: &'a [String],
     token: &'a str,
     n_pseudo: usize,
     s_pseudo: usize,
     alpha: f64,
     alternative: Alternative,
     continuity: bool,
+    use_product: Option<Direction>,
+    seed: u64,
 }
 
 impl<'a> Inc<'a> {
     pub fn new(
         pvalues: &'a Array1<f64>,
+        logfc: &'a Array1<f64>,
         genes: &'a [String],
         token: &'a str,
         n_pseudo: usize,
         s_pseudo: usize,
         alpha: f64,
         alternative: Alternative,
         continuity: bool,
+        use_product: Option<Direction>,
+        seed: Option<u64>,
     ) -> Inc<'a> {
         Inc {
             pvalues,
+            logfc,
             genes,
             token,
             n_pseudo,
             s_pseudo,
             alpha,
             alternative,
             continuity,
+            use_product,
+            seed: seed.unwrap_or(0),
         }
     }
 
     pub fn fit(&self) -> Result<IncResult> {
         let encoding = EncodeIndex::new(self.genes);
         let ntc_index = validate_token(&encoding.map, self.token)?;
-        let ntc_values = select_ranks(ntc_index, encoding.encoding(), self.pvalues);
+        let ntc_pvalues = select_values(ntc_index, encoding.encoding(), self.pvalues);
+        let ntc_logfcs = select_values(ntc_index, encoding.encoding(), self.logfc);
         let n_genes = encoding.map.len() - 1;
 
+        let gene_fc_map = aggregate_fold_changes(self.genes, self.logfc);
+
         // run the rank test on all genes
         let (mwu_scores, mwu_pvalues) = rank_test(
             n_genes,
             ntc_index,
             encoding.encoding(),
             self.pvalues,
-            &ntc_values,
+            &ntc_pvalues,
+            Alternative::Less,
+            self.continuity,
+        );
+
+        // run the rank test on pseudo genes
+        let (pseudo_scores, pseudo_pvalues, pseudo_logfc) = pseudo_rank_test(
+            self.n_pseudo,
+            self.s_pseudo,
+            &ntc_pvalues,
+            &ntc_logfcs,
             self.alternative,
             self.continuity,
+            self.seed,
         );
-        let (pseudo_scores, pseudo_pvalues) =
-            pseudo_rank_test(self.n_pseudo, self.s_pseudo, &ntc_values, self.alternative, self.continuity);
 
         // reconstruct the gene names
         let gene_names = reconstruct_names(encoding.map(), ntc_index);
         let pseudo_names = build_pseudo_names(self.n_pseudo);
 
+        // collect the gene fold changes
+        let gene_logfc = gene_names
+            .iter()
+            .map(|gene| gene_fc_map.get(gene).unwrap())
+            .copied()
+            .collect::<Vec<f64>>();
+
         Ok(IncResult::new(
             gene_names,
             pseudo_names,
             mwu_scores,
             mwu_pvalues,
+            gene_logfc,
             pseudo_scores,
             pseudo_pvalues,
+            pseudo_logfc,
             self.alpha,
+            self.use_product,
         ))
     }
 }
diff --git a/src/mwu.rs b/src/mwu.rs
@@ -5,10 +5,9 @@ use statrs::{
 };
 use std::ops::Div;
 
-/// Defines the alternative hypothesis 
+/// Defines the alternative hypothesis
 #[derive(Clone, Copy, Default, Debug)]
 pub enum Alternative {
-
     /// The alternative hypothesis is that the first array is greater than the second array
     Greater,
 
@@ -150,11 +149,11 @@ fn p_value(z_u: f64, alternative: Alternative) -> f64 {
 /// * `alternative` - The alternative hypothesis to test against
 /// * `use_continuity` - Whether to use continuity correction
 pub fn mann_whitney_u(
-        x: &Array1<f64>, 
-        y: &Array1<f64>, 
-        alternative: Alternative,
-        use_continuity: bool,
-    ) -> (f64, f64) {
+    x: &Array1<f64>,
+    y: &Array1<f64>,
+    alternative: Alternative,
+    use_continuity: bool,
+) -> (f64, f64) {
     let (ranks_x, ranks_y) = merged_ranks(x, y);
 
     let nx = x.len() as f64;
@@ -169,8 +168,8 @@ pub fn mann_whitney_u(
 
 #[cfg(test)]
 mod testing {
-    use crate::mwu::mann_whitney_u;
     use super::merged_ranks;
+    use crate::mwu::mann_whitney_u;
     use ndarray::{array, Array};
 
     const EPSILON: f64 = 1e-10;
@@ -236,14 +235,20 @@ mod testing {
     fn test_alt_u_statistic_greater() {
         let x = array![1., 2., 4.];
         let y = array![3., 5., 6.];
-        assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::Greater), 8.);
+        assert_eq!(
+            super::alt_u_statistic(&x, &y, super::Alternative::Greater),
+            8.
+        );
     }
 
     #[test]
     fn test_alt_u_statistic_two_sided() {
         let x = array![1., 2., 4.];
         let y = array![3., 5., 6.];
-        assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::TwoSided), 1.);
+        assert_eq!(
+            super::alt_u_statistic(&x, &y, super::Alternative::TwoSided),
+            1.
+        );
     }
 
     #[test]