diff --git a/scipy/stats/__init__.py b/scipy/stats/__init__.py
index 7c8e785b1e52..08eee2c602df 100644
--- a/scipy/stats/__init__.py
+++ b/scipy/stats/__init__.py
@@ -306,6 +306,7 @@
 
    ttest_1samp
    binomtest
+   quantile_test
    skewtest
    kurtosistest
    normaltest
diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py
index 7ccf697df4b1..f36c48bb37f9 100644
--- a/scipy/stats/_stats_py.py
+++ b/scipy/stats/_stats_py.py
@@ -49,7 +49,7 @@
                                    siegelslopes)
 from ._stats import (_kendall_dis, _toint64, _weightedrankedtau,
                      _local_correlations)
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from ._hypotests import _all_partitions
 from ._stats_pythran import _compute_outer_prob_inside_method
 from ._resampling import (MonteCarloMethod, PermutationMethod, BootstrapMethod,
@@ -82,10 +82,10 @@
            'kstest', 'ks_1samp', 'ks_2samp',
            'chisquare', 'power_divergence',
            'tiecorrect', 'ranksums', 'kruskal', 'friedmanchisquare',
-           'rankdata',
-           'combine_pvalues', 'wasserstein_distance', 'energy_distance',
+           'rankdata', 'combine_pvalues', 'quantile_test',
+           'wasserstein_distance', 'energy_distance',
            'brunnermunzel', 'alexandergovern',
-           'expectile', ]
+           'expectile']
 
 
 def _chk_asarray(a, axis):
@@ -1646,7 +1646,6 @@ def kurtosistest(a, axis=0, nan_policy='propagate', alternative='two-sided'):
         * 'propagate': returns nan
         * 'raise': throws an error
         * 'omit': performs the calculations ignoring nan values
-
     alternative : {'two-sided', 'less', 'greater'}, optional
         Defines the alternative hypothesis.
         The following options are available (default is 'two-sided'):
@@ -9718,6 +9717,465 @@ def combine_pvalues(pvalues, method='fisher', weights=None):
     return SignificanceResult(statistic, pval)
 
 
+@dataclass
+class QuantileTestResult:
+    r"""
+    Result of `scipy.stats.quantile_test`.
+
+    Attributes
+    ----------
+    statistic: float
+        The statistic used to calculate the p-value; either ``T1``, the
+        number of observations less than or equal to the hypothesized quantile,
+        or ``T2``, the number of observations strictly less than the
+        hypothesized quantile. Two test statistics are required to handle the
+        possibility the data was generated from a discrete or mixed
+        distribution.
+
+    statistic_type : int
+        ``1`` or ``2`` depending on which of ``T1`` or ``T2`` was used to
+        calculate the p-value respectively. ``T1`` corresponds to the
+        ``"greater"`` alternative hypothesis and ``T2`` to the ``"less"``.  For
+        the ``"two-sided"`` case, the statistic type that leads to smallest
+        p-value is used.  For significant tests, ``statistic_type = 1`` means
+        there is evidence that the population quantile is significantly greater
+        than the hypothesized value and ``statistic_type = 2`` means there is
+        evidence that it is significantly less than the hypothesized value.
+
+    pvalue : float
+        The p-value of the hypothesis test.
+    """
+    statistic: float
+    statistic_type: int
+    pvalue: float
+    _alternative: list[str] = field(repr=False)
+    _x : np.ndarray = field(repr=False)
+    _p : float = field(repr=False)
+
+    def confidence_interval(self, confidence_level=0.95):
+        """
+        Compute the confidence interval of the quantile.
+
+        Parameters
+        ----------
+        confidence_level : float, default: 0.95
+            Confidence level for the computed confidence interval
+            of the quantile. Default is 0.95.
+
+        Returns
+        -------
+        ci : ``ConfidenceInterval`` object
+            The object has attributes ``low`` and ``high`` that hold the
+            lower and upper bounds of the confidence interval.
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> import scipy.stats as stats
+        >>> p = 0.75  # quantile of interest
+        >>> q = 0  # hypothesized value of the quantile
+        >>> x = np.exp(np.arange(0, 1.01, 0.01))
+        >>> res = stats.quantile_test(x, q=q, p=p, alternative='less')
+        >>> lb, ub = res.confidence_interval()
+        >>> lb, ub
+        (-inf, 2.293318740264183)
+        >>> res = stats.quantile_test(x, q=q, p=p, alternative='two-sided')
+        >>> lb, ub = res.confidence_interval(0.9)
+        >>> lb, ub
+        (1.9542373206359396, 2.293318740264183)
+        """
+
+        alternative = self._alternative
+        p = self._p
+        x = np.sort(self._x)
+        n = len(x)
+        bd = stats.binom(n, p)
+
+        if confidence_level <= 0 or confidence_level >= 1:
+            message = "`confidence_level` must be a number between 0 and 1."
+            raise ValueError(message)
+
+        low_index = np.nan
+        high_index = np.nan
+
+        if alternative == 'less':
+            p = 1 - confidence_level
+            low = -np.inf
+            high_index = int(bd.isf(p))
+            high = x[high_index] if high_index < n else np.nan
+        elif alternative == 'greater':
+            p = 1 - confidence_level
+            low_index = int(bd.ppf(p)) - 1
+            low = x[low_index] if low_index >= 0 else np.nan
+            high = np.inf
+        elif alternative == 'two-sided':
+            p = (1 - confidence_level) / 2
+            low_index = int(bd.ppf(p)) - 1
+            low = x[low_index] if low_index >= 0 else np.nan
+            high_index = int(bd.isf(p))
+            high = x[high_index] if high_index < n else np.nan
+
+        return ConfidenceInterval(low, high)
+
+
+def quantile_test_iv(x, q, p, alternative):
+
+    x = np.atleast_1d(x)
+    message = '`x` must be a one-dimensional array of numbers.'
+    if x.ndim != 1 or not np.issubdtype(x.dtype, np.number):
+        raise ValueError(message)
+
+    q = np.array(q)[()]
+    message = "`q` must be a scalar."
+    if q.ndim != 0 or not np.issubdtype(q.dtype, np.number):
+        raise ValueError(message)
+
+    p = np.array(p)[()]
+    message = "`p` must be a float strictly between 0 and 1."
+    if p.ndim != 0 or p >= 1 or p <= 0:
+        raise ValueError(message)
+
+    alternatives = {'two-sided', 'less', 'greater'}
+    message = f"`alternative` must be one of {alternatives}"
+    if alternative not in alternatives:
+        raise ValueError(message)
+
+    return x, q, p, alternative
+
+
+def quantile_test(x, *, q=0, p=0.5, alternative='two-sided'):
+    r"""
+    Perform a quantile test and compute a confidence interval of the quantile.
+
+    This function tests the null hypothesis that `q` is the value of the
+    quantile associated with probability `p` of the population underlying
+    sample `x`. For example, with default parameters, it tests that the
+    median of the population underlying `x` is zero. The function returns an
+    object including the test statistic, a p-value, and a method for computing
+    the confidence interval around the quantile.
+
+    Parameters
+    ----------
+    x : array_like
+        A one-dimensional sample.
+    q : float, default: 0
+        The hypothesized value of the quantile.
+    p : float, default: 0.5
+        The probability associated with the quantile; i.e. the proportion of
+        the population less than `q` is `p`. Must be strictly between 0 and
+        1.
+    alternative : {'two-sided', 'less', 'greater'}, optional
+        Defines the alternative hypothesis.
+        The following options are available (default is 'two-sided'):
+
+        * 'two-sided': the quantile associated with the probability `p` 
+          is not `q`.
+        * 'less': the quantile associated with the probability `p` is less
+          than `q`.
+        * 'greater': the quantile associated with the probability `p` is
+          greater than `q`.
+
+    Returns
+    -------
+    result : QuantileTestResult
+        An object with the following attributes:
+
+        statistic : float
+            One of two test statistics that may be used in the quantile test.
+            The first test statistic, ``T1``, is the proportion of samples in
+            `x` that are less than or equal to the hypothesized quantile
+            `q`. The second test statistic, ``T2``, is the proportion of
+            samples in `x` that are strictly less than the hypothesized
+            quantile `q`.
+
+            When ``alternative = 'greater'``, ``T1`` is used to calculate the
+            p-value and ``statistic`` is set to ``T1``.
+
+            When ``alternative = 'less'``, ``T2`` is used to calculate the
+            p-value and ``statistic`` is set to ``T2``.
+
+            When ``alternative = 'two-sided'``, both ``T1`` and ``T2`` are
+            considered, and the one that leads to the smallest p-value is used.
+
+        statistic_type : int
+            Either `1` or `2` depending on which of ``T1`` or ``T2`` was
+            used to calculate the p-value.
+
+        pvalue : float
+            The p-value associated with the given alternative.
+
+        The object also has the following method:
+
+        confidence_interval(confidence_level=0.95)
+            Computes a confidence interval around the the
+            population quantile associated with the probability `p`. The
+            confidence interval is returned in a ``namedtuple`` with
+            fields `low` and `high`.  Values are `nan` when there are
+            not enough observations to compute the confidence interval at
+            the desired confidence.
+
+    Notes
+    -----
+    This test and its method for computing confidence intervals are
+    non-parametric. They are valid if and only if the observations are i.i.d.
+
+    The implementation of the test follows Conover [1]_. Two test statistics
+    are considered.
+
+    ``T1``: The number of observations in `x` less than or equal to `q`.
+
+        ``T1 = (x <= q).sum()``
+
+    ``T2``: The number of observations in `x` strictly less than `q`.
+
+        ``T2 = (x < q).sum()``
+
+    The use of two test statistics is necessary to handle the possibility that
+    `x` was generated from a discrete or mixed distribution.
+
+    The null hypothesis for the test is:
+
+        H0: The :math:`p^{\mathrm{th}}` population quantile is `q`.
+
+    and the null distribution for each test statistic is
+    :math:`\mathrm{binom}\left(n, p\right)`. When ``alternative='less'``,
+    the alternative hypothesis is:
+
+        H1: The :math:`p^{\mathrm{th}}` population quantile is less than `q`.
+
+    and the p-value is the probability that the binomial random variable
+
+    .. math::
+        Y \sim \mathrm{binom}\left(n, p\right)
+
+    is greater than or equal to the observed value ``T2``.
+
+    When ``alternative='greater'``, the alternative hypothesis is:
+
+        H1: The :math:`p^{\mathrm{th}}` population quantile is greater than `q`
+
+    and the p-value is the probability that the binomial random variable Y
+    is less than or equal to the observed value ``T1``.
+
+    When ``alternative='two-sided'``, the alternative hypothesis is
+
+        H1: `q` is not the :math:`p^{\mathrm{th}}` population quantile.
+
+    and the p-value is twice the smaller of the p-values for the ``'less'``
+    and ``'greater'`` cases. Both of these p-values can exceed 0.5 for the same
+    data, so the value is clipped into the interval :math:`[0, 1]`.
+
+    The approach for confidence intervals is attributed to Thompson [2]_ and
+    later proven to be applicable to any set of i.i.d. samples [3]_. The
+    computation is based on the observation that the probability of a quantile
+    :math:`q` to be larger than any observations :math:`x_m (1\leq m \leq N)`
+    can be computed as
+
+    .. math::
+
+        \mathbb{P}(x_m \leq q) = 1 - \sum_{k=0}^{m-1} \binom{N}{k}
+        q^k(1-q)^{N-k}
+
+    By default, confidence intervals are computed for a 95% confidence level.
+    A common interpretation of a 95% confidence intervals is that if i.i.d.
+    samples are drawn repeatedly from the same population and confidence
+    intervals are formed each time, the confidence interval will contain the
+    true value of the specified quantile in approximately 95% of trials.
+
+    A similar function is available in the QuantileNPCI R package [4]_. The
+    foundation is the same, but it computes the confidence interval bounds by
+    doing interpolations between the sample values, whereas this function uses
+    only sample values as bounds. Thus, ``quantile_test.confidence_interval``
+    returns more conservative intervals (i.e., larger).
+
+    The same computation of confidence intervals for quantiles is included in
+    the confintr package [5]_.
+
+    Two-sided confidence intervals are not guaranteed to be optimal; i.e.,
+    there may exist a tighter interval that may contain the quantile of
+    interest with probability larger than the confidence level.
+    Without further assumption on the samples (e.g., the nature of the
+    underlying distribution), the one-sided intervals are optimally tight.
+
+    References
+    ----------
+    .. [1] W. J. Conover. Practical Nonparametric Statistics, 3rd Ed. 1999.
+    .. [2] W. R. Thompson, "On Confidence Ranges for the Median and Other
+       Expectation Distributions for Populations of Unknown Distribution
+       Form," The Annals of Mathematical Statistics, vol. 7, no. 3,
+       pp. 122-128, 1936, Accessed: Sep. 18, 2019. [Online]. Available:
+       https://www.jstor.org/stable/2957563.
+    .. [3] H. A. David and H. N. Nagaraja, "Order Statistics in Nonparametric
+       Inference" in Order Statistics, John Wiley & Sons, Ltd, 2005, pp.
+       159-170. Available:
+       https://onlinelibrary.wiley.com/doi/10.1002/0471722162.ch7.
+    .. [4] N. Hutson, A. Hutson, L. Yan, "QuantileNPCI: Nonparametric
+       Confidence Intervals for Quantiles," R package,
+       https://cran.r-project.org/package=QuantileNPCI
+    .. [5] M. Mayer, "confintr: Confidence Intervals," R package,
+       https://cran.r-project.org/package=confintr
+
+
+    Examples
+    --------
+
+    Suppose we wish to test the null hypothesis that the median of a population
+    is equal to 0.5. We choose a confidence level of 99%; that is, we will
+    reject the null hypothesis in favor of the alternative if the p-value is
+    less than 0.01.
+
+    When testing random variates from the standard uniform distribution, which
+    has a median of 0.5, we expect the data to be consistent with the null
+    hypothesis most of the time.
+
+    >>> import numpy as np
+    >>> from scipy import stats
+    >>> rng = np.random.default_rng(6981396440634228121)
+    >>> rvs = stats.uniform.rvs(size=100, random_state=rng)
+    >>> stats.quantile_test(rvs, q=0.5, p=0.5)
+    QuantileTestResult(statistic=45, statistic_type=1, pvalue=0.36820161732669576)
+
+    As expected, the p-value is not below our threshold of 0.01, so
+    we cannot reject the null hypothesis.
+
+    When testing data from the standard *normal* distribution, which has a 
+    median of 0, we would expect the null hypothesis to be rejected.
+
+    >>> rvs = stats.norm.rvs(size=100, random_state=rng)
+    >>> stats.quantile_test(rvs, q=0.5, p=0.5)
+    QuantileTestResult(statistic=67, statistic_type=2, pvalue=0.0008737198369123724)
+
+    Indeed, the p-value is lower than our threshold of 0.01, so we reject the
+    null hypothesis in favor of the default "two-sided" alternative: the median
+    of the population is *not* equal to 0.5.
+
+    However, suppose we were to test the null hypothesis against the
+    one-sided alternative that the median of the population is *greater* than
+    0.5. Since the median of the standard normal is less than 0.5, we would not
+    expect the null hypothesis to be rejected.
+
+    >>> stats.quantile_test(rvs, q=0.5, p=0.5, alternative='greater')
+    QuantileTestResult(statistic=67, statistic_type=1, pvalue=0.9997956114162866)
+
+    Unsurprisingly, with a p-value greater than our threshold, we would not
+    reject the null hypothesis in favor of the chosen alternative.
+
+    The quantile test can be used for any quantile, not only the median. For
+    example, we can test whether the third quartile of the distribution
+    underlying the sample is greater than 0.6.
+
+    >>> rvs = stats.uniform.rvs(size=100, random_state=rng)
+    >>> stats.quantile_test(rvs, q=0.6, p=0.75, alternative='greater')
+    QuantileTestResult(statistic=64, statistic_type=1, pvalue=0.00940696592998271)
+
+    The p-value is lower than the threshold. We reject the null hypothesis in
+    favor of the alternative: the third quartile of the distribution underlying
+    our sample is greater than 0.6.
+
+    `quantile_test` can also compute confidence intervals for any quantile.
+
+    >>> rvs = stats.norm.rvs(size=100, random_state=rng)
+    >>> res = stats.quantile_test(rvs, q=0.6, p=0.75)
+    >>> ci = res.confidence_interval(confidence_level=0.95)
+    >>> ci
+    ConfidenceInterval(low=0.284491604437432, high=0.8912531024914844)
+
+    When testing a one-sided alternative, the confidence interval contains
+    all observations such that if passed as `q`, the p-value of the
+    test would be greater than 0.05, and therefore the null hypothesis
+    would not be rejected. For example:
+
+    >>> rvs.sort()
+    >>> q, p, alpha = 0.6, 0.75, 0.95
+    >>> res = stats.quantile_test(rvs, q=q, p=p, alternative='less')
+    >>> ci = res.confidence_interval(confidence_level=alpha)
+    >>> for x in rvs[rvs <= ci.high]:
+    ...     res = stats.quantile_test(rvs, q=x, p=p, alternative='less')
+    ...     assert res.pvalue > 1-alpha
+    >>> for x in rvs[rvs > ci.high]:
+    ...     res = stats.quantile_test(rvs, q=x, p=p, alternative='less')
+    ...     assert res.pvalue < 1-alpha
+
+    Also, if a 95% confidence interval is repeatedly generated for random
+    samples, the confidence interval will contain the true quantile value in
+    approximately 95% of replications.
+
+    >>> dist = stats.rayleigh() # our "unknown" distribution
+    >>> p = 0.2
+    >>> true_stat = dist.ppf(p) # the true value of the statistic
+    >>> n_trials = 1000
+    >>> quantile_ci_contains_true_stat = 0
+    >>> for i in range(n_trials):
+    ...     data = dist.rvs(size=100, random_state=rng)
+    ...     res = stats.quantile_test(data, p=p)
+    ...     ci = res.confidence_interval(0.95)
+    ...     if ci[0] < true_stat < ci[1]:
+    ...         quantile_ci_contains_true_stat += 1
+    >>> quantile_ci_contains_true_stat >= 950
+    True
+
+    This works with any distribution and any quantile, as long as the samples
+    are i.i.d.
+    """
+    # Implementation carefully follows [1] 3.2
+    # "H0: the p*th quantile of X is x*"
+    # To facilitate comparison with [1], we'll use variable names that
+    # best match Conover's notation
+    X, x_star, p_star, H1 = quantile_test_iv(x, q, p, alternative)
+
+    # "We will use two test statistics in this test. Let T1 equal "
+    # "the number of observations less than or equal to x*, and "
+    # "let T2 equal the number of observations less than x*."
+    T1 = (X <= x_star).sum()
+    T2 = (X < x_star).sum()
+
+    # "The null distribution of the test statistics T1 and T2 is "
+    # "the binomial distribution, with parameters n = sample size, and "
+    # "p = p* as given in the null hypothesis.... Y has the binomial "
+    # "distribution with parameters n and p*."
+    n = len(X)
+    Y = stats.binom(n=n, p=p_star)
+
+    # "H1: the p* population quantile is less than x*"
+    if H1 == 'less':
+        # "The p-value is the probability that a binomial random variable Y "
+        # "is greater than *or equal to* the observed value of T2...using p=p*"
+        pvalue = Y.sf(T2-1)  # Y.pmf(T2) + Y.sf(T2)
+        statistic = T2
+        statistic_type = 2
+    # "H1: the p* population quantile is greater than x*"
+    elif H1 == 'greater':
+        # "The p-value is the probability that a binomial random variable Y "
+        # "is less than or equal to the observed value of T1... using p = p*"
+        pvalue = Y.cdf(T1)
+        statistic = T1
+        statistic_type = 1
+    # "H1: x* is not the p*th population quantile"
+    elif H1 == 'two-sided':
+        # "The p-value is twice the smaller of the probabilities that a
+        # binomial random variable Y is less than or equal to the observed
+        # value of T1 or greater than or equal to the observed value of T2
+        # using p=p*."
+        # Note: both one-sided p-values can exceed 0.5 for the same data, so
+        # `clip`
+        pvalues = [Y.cdf(T1), Y.sf(T2 - 1)]  # [greater, less]
+        sorted_idx = np.argsort(pvalues)
+        pvalue = np.clip(2*pvalues[sorted_idx[0]], 0, 1)
+        if sorted_idx[0]:
+            statistic, statistic_type = T2, 2
+        else:
+            statistic, statistic_type = T1, 1
+
+    return QuantileTestResult(
+        statistic=statistic,
+        statistic_type=statistic_type,
+        pvalue=pvalue,
+        _alternative=H1,
+        _x=X,
+        _p=p_star
+    )
+
+
 #####################################
 #       STATISTICAL DISTANCES       #
 #####################################
diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py
index 9dbebf63a64e..e8593dcc0450 100644
--- a/scipy/stats/tests/test_stats.py
+++ b/scipy/stats/tests/test_stats.py
@@ -7859,6 +7859,145 @@ def test_alias(self):
         assert_equal(res.stat, res.statistic)
 
 
+class TestQuantileTest():
+    r""" Test the non-parametric quantile test,
+    including the computation of confidence intervals
+    """
+
+    def test_quantile_test_iv(self):
+        x = [1, 2, 3]
+
+        message = "`x` must be a one-dimensional array of numbers."
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test([x])
+
+        message = "`q` must be a scalar."
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test(x, q=[1, 2])
+
+        message = "`p` must be a float strictly between 0 and 1."
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test(x, p=[0.5, 0.75])
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test(x, p=2)
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test(x, p=-0.5)
+
+        message = "`alternative` must be one of..."
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test(x, alternative='one-sided')
+
+        message = "`confidence_level` must be a number between 0 and 1."
+        with pytest.raises(ValueError, match=message):
+            stats.quantile_test(x).confidence_interval(1)
+
+    @pytest.mark.parametrize(
+        'p, alpha, lb, ub, alternative',
+        [[0.3, 0.95, 1.221402758160170, 1.476980793882643, 'two-sided'],
+         [0.5, 0.9, 1.506817785112854, 1.803988415397857, 'two-sided'],
+         [0.25, 0.95, -np.inf, 1.39096812846378, 'less'],
+         [0.8, 0.9, 2.117000016612675, np.inf, 'greater']]
+    )
+    def test_R_ci_quantile(self, p, alpha, lb, ub, alternative):
+        # Test against R library `confintr` function `ci_quantile`, e.g.
+        # library(confintr)
+        # options(digits=16)
+        # x <- exp(seq(0, 1, by = 0.01))
+        # ci_quantile(x, q = 0.3)$interval
+        # ci_quantile(x, q = 0.5, probs = c(0.05, 0.95))$interval
+        # ci_quantile(x, q = 0.25, probs = c(0, 0.95))$interval
+        # ci_quantile(x, q = 0.8, probs = c(0.1, 1))$interval
+        x = np.exp(np.arange(0, 1.01, 0.01))
+        res = stats.quantile_test(x, p=p, alternative=alternative)
+        assert_allclose(res.confidence_interval(alpha), [lb, ub], rtol=1e-15)
+
+    @pytest.mark.parametrize(
+        'q, p, alternative, ref',
+        [[1.2, 0.3, 'two-sided', 0.01515567517648],
+         [1.8, 0.5, 'two-sided', 0.1109183496606]]
+    )
+    def test_R_pvalue(self, q, p, alternative, ref):
+        # Test against R library `snpar` function `quant.test`, e.g.
+        # library(snpar)
+        # options(digits=16)
+        # x < - exp(seq(0, 1, by=0.01))
+        # quant.test(x, q=1.2, p=0.3, exact=TRUE, alternative='t')
+        x = np.exp(np.arange(0, 1.01, 0.01))
+        res = stats.quantile_test(x, q=q, p=p, alternative=alternative)
+        assert_allclose(res.pvalue, ref, rtol=1e-12)
+
+    @pytest.mark.parametrize('case', ['continuous', 'discrete'])
+    @pytest.mark.parametrize('alternative', ['less', 'greater'])
+    @pytest.mark.parametrize('alpha', [0.9, 0.95])
+    def test_pval_ci_match(self, case, alternative, alpha):
+        # Verify that the following statement holds:
+
+        # The 95% confidence interval corresponding with alternative='less'
+        # has -inf as its lower bound, and the upper bound `xu` is the greatest
+        # element from the sample `x` such that:
+        # `stats.quantile_test(x, q=xu, p=p, alternative='less').pvalue``
+        # will be greater than 5%.
+
+        # And the corresponding statement for the alternative='greater' case.
+
+        seed = int((7**len(case) + len(alternative))*alpha)
+        rng = np.random.default_rng(seed)
+        if case == 'continuous':
+            p, q = rng.random(size=2)
+            rvs = rng.random(size=100)
+        else:
+            rvs = rng.integers(1, 11, size=100)
+            p = rng.random()
+            q = rng.integers(1, 11)
+
+        res = stats.quantile_test(rvs, q=q, p=p, alternative=alternative)
+        ci = res.confidence_interval(confidence_level=alpha)
+
+        # select elements inside the confidence interval based on alternative
+        if alternative == 'less':
+            i_inside = rvs <= ci.high
+        else:
+            i_inside = rvs >= ci.low
+
+        for x in rvs[i_inside]:
+            res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative)
+            assert res.pvalue > 1 - alpha
+
+        for x in rvs[~i_inside]:
+            res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative)
+            assert res.pvalue < 1 - alpha
+
+    def test_match_conover_examples(self):
+        # Test against the examples in [1] (Conover Practical Nonparametric
+        # Statistics Third Edition) pg 139
+
+        # Example 1
+        # Data is [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193,
+        # 174, 166, 248]
+        # Two-sided test of whether the upper quartile (p=0.75) equals 193
+        # (q=193). Conover shows that 7 of the observations are less than or
+        # equal to 193, and "for the binomial random variable Y, P(Y<=7) =
+        # 0.0173", so the two-sided p-value is twice that, 0.0346.
+        x = [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193,
+             174, 166, 248]
+        pvalue_expected = 0.0346
+        res = stats.quantile_test(x, q=193, p=0.75, alternative='two-sided')
+        assert_allclose(res.pvalue, pvalue_expected, rtol=1e-5)
+
+        # Example 2
+        # Conover doesn't give explicit data, just that 8 out of 112
+        # observations are 60 or less. The test is whether the median time is
+        # equal to 60 against the alternative that the median is greater than
+        # 60. The p-value is calculated as P(Y<=8), where Y is again a binomial
+        # distributed random variable, now with p=0.5 and n=112. Conover uses a
+        # normal approximation, but we can easily calculate the CDF of the
+        # binomial distribution.
+        x = [59]*8 + [61]*(112-8)
+        pvalue_expected = stats.binom(p=0.5, n=112).pmf(k=8)
+        res = stats.quantile_test(x, q=60, p=0.5, alternative='greater')
+        assert_allclose(res.pvalue, pvalue_expected, atol=1e-10)
+
+
 class TestPageTrendTest:
     # expected statistic and p-values generated using R at
     # https://rdrr.io/cran/cultevo/, e.g.