Skip to content

Commit

Permalink
Merge pull request #11 from noamteyssier/10-perform-inc-always-with-g…
Browse files Browse the repository at this point in the history
…ene-product

10 perform inc always with gene product
  • Loading branch information
noamteyssier committed May 9, 2023
2 parents 47b7268 + b227e65 commit 63c6d1b
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 60 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "intc"
version = "0.1.4"
version = "0.2.0"
edition = "2021"
description = "An implementation of the *-INC method to calculate an empirical FDR for non-targeting controls in CRISPR screens "
license = "MIT"
Expand Down
53 changes: 41 additions & 12 deletions src/fdr.rs
@@ -1,6 +1,12 @@
use crate::utils::{argsort, argsort_vec};
use crate::utils::{argsort, argsort_vec, diagonal_product};
use ndarray::{Array1, Axis};

#[derive(Clone, Copy, Debug)]
pub enum Direction {
Less,
Greater,
}

#[derive(Debug)]
/// Result struct for False Discovery Rate and p-value threshold
pub struct FdrResult {
Expand Down Expand Up @@ -35,29 +41,48 @@ impl FdrResult {
/// False Discovery Rate
pub struct Fdr<'a> {
pvalues: &'a Array1<f64>,
product: Array1<f64>,
ntc_indices: &'a [usize],
alpha: f64,
use_product: Option<Direction>,
}

impl<'a> Fdr<'a> {
/// Create a new FDR struct
pub fn new(pvalues: &'a Array1<f64>, ntc_indices: &'a [usize], alpha: f64) -> Self {
pub fn new(
pvalues: &'a Array1<f64>,
logfc: &'a Array1<f64>,
ntc_indices: &'a [usize],
alpha: f64,
use_product: Option<Direction>,
) -> Self {
Self {
pvalues,
ntc_indices,
alpha,
use_product,
product: diagonal_product(logfc, pvalues),
}
}

/// Fit the FDR
pub fn fit(&self) -> FdrResult {
let order = argsort(self.pvalues);
let values = match self.use_product {
Some(Direction::Less) => &self.product,
Some(Direction::Greater) => &self.product,
None => &self.pvalues,
};
let order = match self.use_product {
Some(Direction::Less) => argsort(&self.product, true),
Some(Direction::Greater) => argsort(&self.product, false),
None => argsort(&self.pvalues, true),
};
let reorder = argsort_vec(&order);
let is_ntc = Self::ntc_mask(self.ntc_indices, self.pvalues.len());
let sorted_pvalues = self.pvalues.select(Axis(0), &order);
let sorted_values = values.select(Axis(0), &order);
let sorted_ntc = is_ntc.select(Axis(0), &order);
let sorted_fdr = Self::empirical_fdr(&sorted_ntc);
let threshold = Self::threshold(&sorted_pvalues, &sorted_fdr, self.alpha);
let threshold = Self::threshold(&sorted_values, &sorted_fdr, self.alpha);
let unsorted_fdr = sorted_fdr.select(Axis(0), &reorder);
FdrResult::new(unsorted_fdr, threshold)
}
Expand Down Expand Up @@ -87,11 +112,11 @@ impl<'a> Fdr<'a> {
}

/// Calculate the p-value threshold
fn threshold(pvalues: &Array1<f64>, fdr: &Array1<f64>, alpha: f64) -> f64 {
fn threshold(values: &Array1<f64>, fdr: &Array1<f64>, alpha: f64) -> f64 {
let fdr_pval = fdr
.iter()
.zip(pvalues.iter())
.take_while(|(fdr, _pvalue)| *fdr <= &alpha)
.zip(values.iter())
.take_while(|(fdr, _value)| *fdr <= &alpha)
.reduce(|_x, y| y);
if let Some(fp) = fdr_pval {
*fp.1
Expand All @@ -109,27 +134,30 @@ mod testing {
#[test]
fn test_fdr() {
let pvalues = array![0.1, 0.2, 0.3];
let logfc = array![0.1, 0.2, 0.3];
let ntc_indices = vec![1];
let alpha = 0.1;
let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
assert_eq!(fdr.fdr(), array![0.0, 0.5, 1. / 3.]);
}

#[test]
fn test_fdr_unsorted() {
let pvalues = array![0.2, 0.1, 0.3];
let logfc = array![0.1, 0.2, 0.3];
let ntc_indices = vec![1];
let alpha = 0.1;
let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
assert_eq!(fdr.fdr(), array![0.5, 1.0, 1. / 3.]);
}

#[test]
fn test_fdr_unsorted_larger() {
let pvalues = array![0.5, 0.1, 0.3, 0.4, 0.2, 0.6];
let logfc = array![0.5, 0.1, 0.3, 0.4, 0.2, 0.6];
let ntc_indices = vec![3, 5];
let alpha = 0.1;
let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
assert_eq!(fdr.fdr(), array![0.2, 0.0, 0.0, 0.25, 0.0, 1. / 3.]);
}

Expand All @@ -144,9 +172,10 @@ mod testing {
fn test_fdr_threshold() {
let m = 10;
let pvalues = Array1::linspace(0.0, 1.0, m);
let logfc = Array1::linspace(0.0, 1.0, m);
let ntc_indices = vec![4, 6, 9];
let alpha = 0.1;
let fdr = Fdr::new(&pvalues, &ntc_indices, alpha).fit();
let fdr = Fdr::new(&pvalues, &logfc, &ntc_indices, alpha, None).fit();
assert_eq!(fdr.threshold(), 1. / 3.);
}
}
46 changes: 41 additions & 5 deletions src/inc.rs
@@ -1,78 +1,114 @@
use crate::{
encode::EncodeIndex,
fdr::Direction,
mwu::Alternative,
rank_test::{pseudo_rank_test, rank_test},
result::IncResult,
utils::{build_pseudo_names, reconstruct_names, select_ranks, validate_token}, mwu::Alternative,
utils::{
aggregate_fold_changes, build_pseudo_names, reconstruct_names, select_values,
validate_token,
},
};
use anyhow::Result;
use ndarray::Array1;

#[derive(Debug)]
pub struct Inc<'a> {
pvalues: &'a Array1<f64>,
logfc: &'a Array1<f64>,
genes: &'a [String],
token: &'a str,
n_pseudo: usize,
s_pseudo: usize,
alpha: f64,
alternative: Alternative,
continuity: bool,
use_product: Option<Direction>,
seed: u64,
}

impl<'a> Inc<'a> {
pub fn new(
pvalues: &'a Array1<f64>,
logfc: &'a Array1<f64>,
genes: &'a [String],
token: &'a str,
n_pseudo: usize,
s_pseudo: usize,
alpha: f64,
alternative: Alternative,
continuity: bool,
use_product: Option<Direction>,
seed: Option<u64>,
) -> Inc<'a> {
Inc {
pvalues,
logfc,
genes,
token,
n_pseudo,
s_pseudo,
alpha,
alternative,
continuity,
use_product,
seed: seed.unwrap_or(0),
}
}

pub fn fit(&self) -> Result<IncResult> {
let encoding = EncodeIndex::new(self.genes);
let ntc_index = validate_token(&encoding.map, self.token)?;
let ntc_values = select_ranks(ntc_index, encoding.encoding(), self.pvalues);
let ntc_pvalues = select_values(ntc_index, encoding.encoding(), self.pvalues);
let ntc_logfcs = select_values(ntc_index, encoding.encoding(), self.logfc);
let n_genes = encoding.map.len() - 1;

let gene_fc_map = aggregate_fold_changes(self.genes, self.logfc);

// run the rank test on all genes
let (mwu_scores, mwu_pvalues) = rank_test(
n_genes,
ntc_index,
encoding.encoding(),
self.pvalues,
&ntc_values,
&ntc_pvalues,
Alternative::Less,
self.continuity,
);

// run the rank test on pseudo genes
let (pseudo_scores, pseudo_pvalues, pseudo_logfc) = pseudo_rank_test(
self.n_pseudo,
self.s_pseudo,
&ntc_pvalues,
&ntc_logfcs,
self.alternative,
self.continuity,
self.seed,
);
let (pseudo_scores, pseudo_pvalues) =
pseudo_rank_test(self.n_pseudo, self.s_pseudo, &ntc_values, self.alternative, self.continuity);

// reconstruct the gene names
let gene_names = reconstruct_names(encoding.map(), ntc_index);
let pseudo_names = build_pseudo_names(self.n_pseudo);

// collect the gene fold changes
let gene_logfc = gene_names
.iter()
.map(|gene| gene_fc_map.get(gene).unwrap())
.copied()
.collect::<Vec<f64>>();

Ok(IncResult::new(
gene_names,
pseudo_names,
mwu_scores,
mwu_pvalues,
gene_logfc,
pseudo_scores,
pseudo_pvalues,
pseudo_logfc,
self.alpha,
self.use_product,
))
}
}
25 changes: 15 additions & 10 deletions src/mwu.rs
Expand Up @@ -5,10 +5,9 @@ use statrs::{
};
use std::ops::Div;

/// Defines the alternative hypothesis
/// Defines the alternative hypothesis
#[derive(Clone, Copy, Default, Debug)]
pub enum Alternative {

/// The alternative hypothesis is that the first array is greater than the second array
Greater,

Expand Down Expand Up @@ -150,11 +149,11 @@ fn p_value(z_u: f64, alternative: Alternative) -> f64 {
/// * `alternative` - The alternative hypothesis to test against
/// * `use_continuity` - Whether to use continuity correction
pub fn mann_whitney_u(
x: &Array1<f64>,
y: &Array1<f64>,
alternative: Alternative,
use_continuity: bool,
) -> (f64, f64) {
x: &Array1<f64>,
y: &Array1<f64>,
alternative: Alternative,
use_continuity: bool,
) -> (f64, f64) {
let (ranks_x, ranks_y) = merged_ranks(x, y);

let nx = x.len() as f64;
Expand All @@ -169,8 +168,8 @@ pub fn mann_whitney_u(

#[cfg(test)]
mod testing {
use crate::mwu::mann_whitney_u;
use super::merged_ranks;
use crate::mwu::mann_whitney_u;
use ndarray::{array, Array};

const EPSILON: f64 = 1e-10;
Expand Down Expand Up @@ -236,14 +235,20 @@ mod testing {
fn test_alt_u_statistic_greater() {
let x = array![1., 2., 4.];
let y = array![3., 5., 6.];
assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::Greater), 8.);
assert_eq!(
super::alt_u_statistic(&x, &y, super::Alternative::Greater),
8.
);
}

#[test]
fn test_alt_u_statistic_two_sided() {
let x = array![1., 2., 4.];
let y = array![3., 5., 6.];
assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::TwoSided), 1.);
assert_eq!(
super::alt_u_statistic(&x, &y, super::Alternative::TwoSided),
1.
);
}

#[test]
Expand Down

0 comments on commit 63c6d1b

Please sign in to comment.