In [1]:
import numpy as np
from numpy.random import Generator, PCG64
from hyppo.independence import FriedmanRafsky

seed = 402

A helper method to format the input for the Friedman Rafsky test.

In [2]:
def combine_then_label(x1, x2):
    return_X = np.concatenate([x1, x2])

    n1 = x1.shape[0]
    n2 = x2.shape[0]

    return_Y = np.repeat([1, 2], [n1, n2])

    return return_X, return_Y


Create a pair of datasets, both from the same uniform distribution

In [3]:
rng = Generator(PCG64(seed))

N_SAMPLES = 4
N_DIMS = 2
range_min, range_max = 0, 100

uniorm_pair_same_size = (
    rng.uniform(range_min, range_max, (N_SAMPLES, N_DIMS)),
    rng.uniform(range_min, range_max, (N_SAMPLES, N_DIMS))
)

x, y = combine_then_label(*uniorm_pair_same_size)


Run them throught the FR test

In [4]:
test_method = FriedmanRafsky()
rslt1 = test_method.test(x, y)

print(
    f"***According to `test()` method***\npvalue:\t{rslt1.pvalue}\nstat:\t{rslt1.stat}\n"
)

rslt1 = test_method.statistic(x, y)
print(
    f"***According to `statistic()` method***\nstat:\t{rslt1}\n"
)


***According to `test()` method***
pvalue:	0.37462537462537465
stat:	0.745579861773754

***According to `statistic()` method***
stat:	6



Let's try with different size datasets

In [5]:
rng = Generator(PCG64(seed))

N_SAMPLES = 4
N_DIMS = 2
range_min, range_max = 0, 100

uniorm_pair_same_size = (
    rng.uniform(range_min, range_max, (N_SAMPLES, N_DIMS)),
    rng.uniform(range_min, range_max, (N_SAMPLES+2, N_DIMS))
)

x, y = combine_then_label(*uniorm_pair_same_size)

test_method = FriedmanRafsky()
rslt2 = test_method.test(x, y)

print(
    f"***According to `test()` method***\npvalue:\t{rslt2.pvalue}\nstat:\t{rslt2.stat}\n"
)

rslt2 = test_method.statistic(x, y)
print(
    f"***According to `statistic()` method***\nstat:\t{rslt2}\n"
)


***According to `test()` method***
pvalue:	0.12487512487512488
stat:	1.5268127144616541

***According to `statistic()` method***
stat:	8

