In [68]:
from hypothesis.extra.pandas import indexes, series
import hypothesis.strategies as st
from hypothesis import given, assume

In [27]:
series(dtype=float).example()

Series([], dtype: float64)

In [202]:
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu, norm


def AUC(labels: pd.Series, values: pd.Series) -> Dict[str, float]:
    labels = labels.fillna(value=0)
    values = values.fillna(value=0)
    posii = labels[labels > 0]
    negii = labels[labels <= 0]
    posn = len(posii)
    negn = len(negii)
    posval = values[posii.index]
    negval = values[negii.index]
    if (posn > 0 and negn > 0):
        statistic, pvalue = mannwhitneyu(posval, negval, alternative = "greater")
        conf_int_low, conf_int_high = mannwhitneyu_conf_int(posval, negval)
        res = {
            "low": conf_int_low,
            "high": conf_int_high,
            "auc": (statistic / (posn * negn)),
            "pval": pvalue
        }
    else:
        res = {
        "auc": 0.5,
        "pval": np.nan
        }

    return res


def mannwhitneyu_conf_int(
    x: np.array,
    y: np.array,
    alpha: float = 0.05
) -> Tuple[float, float]:
    n = len(x)
    m = len(y)

    N = norm.ppf(1 - alpha / 2)

    diffs = sorted([i - j for i in x for j in y])

    # For an approximate 100(1-a)% confidence interval first calculate K:
    nm = n*m
    top = nm*(n+m+1)
    right = N*np.sqrt(top/12)
    left = (n*m)/2
    K = left - right

    # The Kth smallest to the Kth largest of the n x m differences
    # lx and ly should be > ~20
    return (diffs[round(K)], diffs[len(diffs)-round(K)])

In [207]:
import hypothesis.strategies as st
from hypothesis import given, assume

In [205]:
@given(
    x=st.lists(elements=st.integers(), min_size=20),
    y=st.lists(elements=st.integers(), min_size=20)
)
def testconfint(x, y):
    assume(len(x) > 5)
    assume(len(x) > 5)
    mannwhitneyu_conf_int(x, y)

In [206]:
testconfint()

In [208]:
from hypothesis.extra.pandas import series, indexes

In [220]:
@given(x = series(dtype=float),y = series(dtype=float))
def testAUC(x,y):
    assume(len(x) == len(y))
    assume(len(x) > 5)
    assume(len(y) > 5)
    assume(x.index.isin(y.index).all())
    assume(y.index.isin(x.index).all())
    res = AUC(x, y)
    assert(type(res) == "dict")

In [223]:
testAUC()

Falsifying example: testAUC(
    x=0    0.0
    1    1.0
    2    1.0
    3    1.0
    4    1.0
    5    1.0
    dtype: float64, y=0    0.0
    1    0.0
    2    0.0
    3    0.0
    4    0.0
    5    0.0
    dtype: float64,
)
Traceback (most recent call last):
  File "/tmp/ipykernel_21597/377233995.py", line 8, in testAUC
    res = AUC(x, y)
  File "/tmp/ipykernel_21597/2590556554.py", line 18, in AUC
    conf_int_low, conf_int_high = mannwhitneyu_conf_int(posval, negval)
  File "/tmp/ipykernel_21597/2590556554.py", line 55, in mannwhitneyu_conf_int
    return (diffs[round(K)], diffs[len(diffs)-round(K)])
IndexError: list index out of range

Falsifying example: testAUC(
    x=0    0.0
    1    0.0
    2    0.0
    3    0.0
    4    0.0
    5    0.0
    dtype: float64, y=0    0.0
    1    0.0
    2    0.0
    3    0.0
    4    0.0
    5    0.0
    dtype: float64,
)
Traceback (most recent call last):
  File "/tmp/ipykernel_21597/377233995.py", line 9, in testAUC
    assert(type(res) == 

MultipleFailures: Hypothesis found 2 distinct failures.