Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix numerical issues inside p-value computation #9

Merged
merged 2 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions indirect_gwas/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

#include <boost/math/distributions/students_t.hpp>

float compute_log_p_value(float t_statistic, unsigned int degrees_of_freedom)
double compute_log_p_value(double t_statistic, unsigned int degrees_of_freedom)
{
boost::math::students_t dist(degrees_of_freedom);
float p_value = boost::math::cdf(dist, -std::abs(t_statistic));
double p_value = boost::math::cdf(dist, -std::abs(t_statistic));
return -1. * (std::log(p_value) + std::log(2)) / std::log(10);
}

Expand Down
2 changes: 1 addition & 1 deletion indirect_gwas/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ struct ResultsChunk
Eigen::VectorXf sample_size;
};

float compute_log_p_value(float t_statistic, unsigned int degrees_of_freedom);
double compute_log_p_value(double t_statistic, unsigned int degrees_of_freedom);

std::string get_formatted_time();
3 changes: 2 additions & 1 deletion indirect_gwas/src/wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ PYBIND11_MODULE(_igwas, m)
py::arg("n_covariates"),
py::arg("chunksize"),
py::arg("single_file_output"));
m.def("compute_pvalue", &compute_log_p_value, "Compute negative log10 p-value");
m.def("compute_pvalue", &compute_log_p_value, "Compute negative log10 p-value",
py::arg("t_statistic"), py::arg("degrees_of_freedom"));
m.def("head", &head, "Print the first n lines of a file", py::arg("filename"), py::arg("n_lines"));
}
31 changes: 31 additions & 0 deletions indirect_gwas/test/test_pvalues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import pytest
import scipy.stats

from indirect_gwas._igwas import compute_pvalue


@pytest.mark.parametrize(
"t_stat, df",
[
(0.0, 1),
(1.0, 1),
(1.0, 2),
(1e-4, 100_000),
(1e-8, 100_000),
(10, 100_000),
(-10, 100_000),
(20, 100_000),
(50, 100_000),
(100, 100_000),
(500, 100_000),
(1000, 100_000),
],
)
def test_compute_pvalue(t_stat, df):
"""
Test that the p-value is computed correctly. All p-values are negative log10.
"""
cpp_pvalue = compute_pvalue(t_stat, df)
py_pvalue = -(scipy.stats.t(df=df).logsf(abs(t_stat)) + np.log(2)) / np.log(10)
assert cpp_pvalue == pytest.approx(py_pvalue, rel=1e-6)