Skip to content

Commit

Permalink
Merge pull request #9 from noamteyssier/7-specify-alternative-in-mann…
Browse files Browse the repository at this point in the history
…-whitney-u-test

7 specify alternative in mann whitney u test
  • Loading branch information
noamteyssier committed Feb 2, 2023
2 parents 132621b + 6844880 commit 47b7268
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "intc"
version = "0.1.3"
version = "0.1.4"
edition = "2021"
description = "An implementation of the *-INC method to calculate an empirical FDR for non-targeting controls in CRISPR screens "
license = "MIT"
Expand Down
8 changes: 6 additions & 2 deletions src/inc.rs
Expand Up @@ -2,7 +2,7 @@ use crate::{
encode::EncodeIndex,
rank_test::{pseudo_rank_test, rank_test},
result::IncResult,
utils::{build_pseudo_names, reconstruct_names, select_ranks, validate_token},
utils::{build_pseudo_names, reconstruct_names, select_ranks, validate_token}, mwu::Alternative,
};
use anyhow::Result;
use ndarray::Array1;
Expand All @@ -15,6 +15,7 @@ pub struct Inc<'a> {
n_pseudo: usize,
s_pseudo: usize,
alpha: f64,
alternative: Alternative,
continuity: bool,
}

Expand All @@ -26,6 +27,7 @@ impl<'a> Inc<'a> {
n_pseudo: usize,
s_pseudo: usize,
alpha: f64,
alternative: Alternative,
continuity: bool,
) -> Inc<'a> {
Inc {
Expand All @@ -35,6 +37,7 @@ impl<'a> Inc<'a> {
n_pseudo,
s_pseudo,
alpha,
alternative,
continuity,
}
}
Expand All @@ -52,10 +55,11 @@ impl<'a> Inc<'a> {
encoding.encoding(),
self.pvalues,
&ntc_values,
self.alternative,
self.continuity,
);
let (pseudo_scores, pseudo_pvalues) =
pseudo_rank_test(self.n_pseudo, self.s_pseudo, &ntc_values, self.continuity);
pseudo_rank_test(self.n_pseudo, self.s_pseudo, &ntc_values, self.alternative, self.continuity);

// reconstruct the gene names
let gene_names = reconstruct_names(encoding.map(), ntc_index);
Expand Down
200 changes: 187 additions & 13 deletions src/mwu.rs
Expand Up @@ -5,7 +5,29 @@ use statrs::{
};
use std::ops::Div;

/// Defines the alternative hypothesis
#[derive(Clone, Copy, Default, Debug)]
pub enum Alternative {

/// The alternative hypothesis is that the first array is greater than the second array
Greater,

/// The alternative hypothesis is that the first array is less than the second array
Less,

/// The alternative hypothesis is that the first array is not equal to the second array
#[default]
TwoSided,
}

/// Concatenate two arrays, rank them, and return the rankings for each of the groups.
///
/// # Arguments
/// * `x` - The first array
/// * `y` - The second array
///
/// # Returns
/// * `ranks` - A tuple containing the rankings for each of the groups
fn merged_ranks(x: &Array1<f64>, y: &Array1<f64>) -> (Array1<f64>, Array1<f64>) {
let midpoint = x.len();
let joined = concatenate(Axis(0), &[x.view(), y.view()])
Expand All @@ -19,18 +41,59 @@ fn merged_ranks(x: &Array1<f64>, y: &Array1<f64>) -> (Array1<f64>, Array1<f64>)
}

/// Calculates the U-Statistic given an array of ranks
///
/// # Arguments
/// * `array` - The array of ranks
///
/// # Returns
/// * `u` - The U-Statistic
fn u_statistic(array: &Array1<f64>) -> f64 {
let s = array.sum();
let n = array.len() as f64;
s - ((n * (n + 1.)) / 2.)
}

/// Calculates the Mann-Whitney U-Statistic
///
/// # Arguments
/// * `x` - The first group of ranks
/// * `y` - The second group of ranks
/// * `alternative` - The alternative hypothesis
///
/// # Returns
/// * `u` - The U-Statistic
fn alt_u_statistic(ranks_x: &Array1<f64>, ranks_y: &Array1<f64>, alternative: Alternative) -> f64 {
match alternative {
Alternative::Less => u_statistic(ranks_x),
Alternative::Greater => u_statistic(ranks_y),
Alternative::TwoSided => {
let u_x = u_statistic(ranks_x);
let u_y = u_statistic(ranks_y);
u_x.min(u_y)
}
}
}

/// Calculates the U-Distribution Mean
///
/// # Arguments
/// * `nx` - The number of elements in the first group
/// * `ny` - The number of elements in the second group
///
/// # Returns
/// * `m_u` - The mean of the U-Distribution
fn u_mean(nx: f64, ny: f64) -> f64 {
(nx * ny) / 2.
}

/// Calculates the U-Distribution Standard Deviation
///
/// # Arguments
/// * `nx` - The number of elements in the first group
/// * `ny` - The number of elements in the second group
///
/// # Returns
/// * `s_u` - The standard deviation of the U-Distribution
fn u_std(nx: f64, ny: f64) -> f64 {
(nx * ny * (nx + ny + 1.)).div(12.).sqrt()
}
Expand All @@ -41,6 +104,15 @@ fn u_std(nx: f64, ny: f64) -> f64 {
// Because SF is always used to calculate the p-value, we can always
// _subtract_ 0.5 for the continuity correction. This always increases the
// p-value to account for the rest of the probability mass _at_ q = U.
//
// # Arguments
// * `u` - The U-Statistic
// * `nx` - The number of elements in the first group
// * `ny` - The number of elements in the second group
// * `use_continuity` - Whether to use continuity correction
//
// # Returns
// * `z_u` - The z-score of the U-Statistic
fn z_score(u: f64, nx: f64, ny: f64, use_continuity: bool) -> f64 {
let m_u = u_mean(nx, ny);
let s_u = u_std(nx, ny);
Expand All @@ -50,29 +122,62 @@ fn z_score(u: f64, nx: f64, ny: f64, use_continuity: bool) -> f64 {
} else {
(u - m_u) / s_u
}
}

/// Calculates the pvalue of the z-score and adjusts for the alternative hypothesis
/// if necessary.
///
/// # Arguments
/// * `z_u` - The z-score of the U-Statistic
/// * `alternative` - The alternative hypothesis to test against
///
/// # Returns
/// * `pvalue` - The p-value of the test
fn p_value(z_u: f64, alternative: Alternative) -> f64 {
let pvalue = Normal::new(0., 1.).unwrap().cdf(z_u);
match alternative {
Alternative::TwoSided => 2. * pvalue,
_ => pvalue,
}
}

/// Performs the Mann-Whitney U Test otherwise known as the Rank-Sum Test to measure the
/// statistical difference between two values through their ranks.
pub fn mann_whitney_u(x: &Array1<f64>, y: &Array1<f64>, use_continuity: bool) -> (f64, f64) {
let (ranks_x, _ranks_y) = merged_ranks(x, y);
/// Performs the Mann-Whitney U Test to measure the statistical difference between
/// two groups through their rank values.
///
/// # Arguments
/// * `x` - Array of values from the first group
/// * `y` - Array of values from the second group
/// * `alternative` - The alternative hypothesis to test against
/// * `use_continuity` - Whether to use continuity correction
pub fn mann_whitney_u(
x: &Array1<f64>,
y: &Array1<f64>,
alternative: Alternative,
use_continuity: bool,
) -> (f64, f64) {
let (ranks_x, ranks_y) = merged_ranks(x, y);

let nx = x.len() as f64;
let ny = y.len() as f64;

let u_t = u_statistic(&ranks_x);
let u_t = alt_u_statistic(&ranks_x, &ranks_y, alternative);
let z_u = z_score(u_t, nx, ny, use_continuity);
let p_v = p_value(z_u, alternative);

(u_t, Normal::new(0., 1.).unwrap().cdf(z_u))
(u_t, p_v)
}

#[cfg(test)]
mod testing {
use crate::mwu::mann_whitney_u;

use super::merged_ranks;
use ndarray::{array, Array1};
use ndarray::{array, Array};

const EPSILON: f64 = 1e-10;

fn test_close(a: f64, b: f64) {
assert!((a - b).abs() < EPSILON);
}

#[test]
fn test_merged_ranks() {
Expand All @@ -95,7 +200,9 @@ mod testing {
#[test]
fn test_u_statistic() {
let x = array![1., 2., 4.];
let y = array![3., 5., 6.];
assert_eq!(super::u_statistic(&x), 1.);
assert_eq!(super::u_statistic(&y), 8.);
}

#[test]
Expand All @@ -119,10 +226,77 @@ mod testing {
}

#[test]
fn test_mann_whitney_u() {
let x = Array1::range(1., 100., 1.);
let y = Array1::range(100., 200., 1.);
let (_, pv) = mann_whitney_u(&x, &y, false);
assert!(pv - 1.87e-34 < 1e-30);
fn test_alt_u_statistic_less() {
let x = array![1., 2., 4.];
let y = array![3., 5., 6.];
assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::Less), 1.);
}

#[test]
fn test_alt_u_statistic_greater() {
let x = array![1., 2., 4.];
let y = array![3., 5., 6.];
assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::Greater), 8.);
}

#[test]
fn test_alt_u_statistic_two_sided() {
let x = array![1., 2., 4.];
let y = array![3., 5., 6.];
assert_eq!(super::alt_u_statistic(&x, &y, super::Alternative::TwoSided), 1.);
}

#[test]
fn test_mann_whitney_u_twosided_continuity() {
let x = Array::range(0.0, 10.0, 1.0);
let y = Array::range(10.0, 20.0, 1.0);
let (u, p) = mann_whitney_u(&x, &y, super::Alternative::TwoSided, true);
assert_eq!(u, 0.);
test_close(p, 0.0001826717911243235);
}

#[test]
fn test_mann_whitney_u_twosided_no_continuity() {
let x = Array::range(0.0, 10.0, 1.0);
let y = Array::range(10.0, 20.0, 1.0);
let (u, p) = mann_whitney_u(&x, &y, super::Alternative::TwoSided, false);
assert_eq!(u, 0.);
test_close(p, 0.00015705228423075119);
}

#[test]
fn test_mann_whitney_u_less_continuity() {
let x = Array::range(0.0, 10.0, 1.0);
let y = Array::range(10.0, 20.0, 1.0);
let (u, p) = mann_whitney_u(&x, &y, super::Alternative::Less, true);
assert_eq!(u, 0.);
test_close(p, 9.133589556216175e-5);
}

#[test]
fn test_mann_whitney_u_less_no_continuity() {
let x = Array::range(0.0, 10.0, 1.0);
let y = Array::range(10.0, 20.0, 1.0);
let (u, p) = mann_whitney_u(&x, &y, super::Alternative::Less, false);
assert_eq!(u, 0.);
test_close(p, 7.852614211537559e-05);
}

#[test]
fn test_mann_whitney_u_greater_continuity() {
let x = Array::range(0.0, 10.0, 1.0);
let y = Array::range(10.0, 20.0, 1.0);
let (u, p) = mann_whitney_u(&x, &y, super::Alternative::Greater, true);
assert_eq!(u, 100.);
test_close(p, 0.9999325785388173);
}

#[test]
fn test_mann_whitney_u_greater_no_continuity() {
let x = Array::range(0.0, 10.0, 1.0);
let y = Array::range(10.0, 20.0, 1.0);
let (u, p) = mann_whitney_u(&x, &y, super::Alternative::Greater, false);
assert_eq!(u, 100.);
test_close(p, 0.9999214738578847);
}
}
19 changes: 12 additions & 7 deletions src/rank_test.rs
@@ -1,4 +1,4 @@
use crate::{mwu::mann_whitney_u, utils::select_ranks};
use crate::{mwu::{mann_whitney_u, Alternative}, utils::select_ranks};
use ndarray::{Array1, Axis};
use ndarray_rand::{rand_distr::Uniform, RandomExt};

Expand All @@ -10,12 +10,13 @@ pub fn rank_test(
encoding: &[usize],
pvalues: &Array1<f64>,
ntc_values: &Array1<f64>,
alternative: Alternative,
continuity: bool,
) -> (Vec<f64>, Vec<f64>) {
(0..=n_genes)
.filter(|x| *x != ntc_index)
.map(|x| select_ranks(x, encoding, pvalues))
.map(|x| mann_whitney_u(&x, ntc_values, continuity))
.map(|x| mann_whitney_u(&x, ntc_values, alternative, continuity))
.unzip()
}

Expand All @@ -25,6 +26,7 @@ pub fn pseudo_rank_test(
n_pseudo: usize,
s_pseudo: usize,
ntc_values: &Array1<f64>,
alternative: Alternative,
continuity: bool,
) -> (Vec<f64>, Vec<f64>) {
(0..n_pseudo)
Expand All @@ -38,14 +40,15 @@ pub fn pseudo_rank_test(
let out_group = ntc_values.select(Axis(0), &out_mask);
(in_group, out_group)
})
.map(|(in_group, out_group)| mann_whitney_u(&in_group, &out_group, continuity))
.map(|(in_group, out_group)| mann_whitney_u(&in_group, &out_group, alternative, continuity))
.unzip()
}

#[cfg(test)]
mod testing {
use super::rank_test;
use ndarray::array;
use crate::mwu::Alternative;

#[test]
fn test_rank_test() {
Expand All @@ -54,17 +57,19 @@ mod testing {
let encoding = vec![0, 1, 2, 0, 1, 2, 0, 1, 2, 0];
let pvalues = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.0];
let ntc_values = array![0.7, 0.8, 0.9, 0.7, 0.8];
let (u, p) = rank_test(n_genes, ntc_index, &encoding, &pvalues, &ntc_values, false);
let alternative = Alternative::TwoSided;
let (u, p) = rank_test(n_genes, ntc_index, &encoding, &pvalues, &ntc_values, alternative, false);
assert_eq!(u, vec![3.0, 4.5]);
assert_eq!(p, vec![0.08985624744357722, 0.18554668477815348]);
assert_eq!(p, vec![0.17971249488715443, 0.37109336955630695]);
}

#[test]
fn test_pseudo_rank_test() {
let n_pseudo = 2;
let s_pseudo = 3;
let ntc_values = array![0.7, 0.7, 0.7, 0.7, 0.7, 0.7];
let (_u, p) = super::pseudo_rank_test(n_pseudo, s_pseudo, &ntc_values, false);
assert_eq!(p, vec![0.5, 0.5]);
let alternative = Alternative::TwoSided;
let (_u, p) = super::pseudo_rank_test(n_pseudo, s_pseudo, &ntc_values, alternative, false);
assert_eq!(p, vec![1.0, 1.0]);
}
}

0 comments on commit 47b7268

Please sign in to comment.