In [30]:
import numpy as np
import math

from hyppo.independence import Dcorr
from sklearn.metrics import pairwise_distances


In [52]:
x = np.array([  [1,2,3],
                [3,4,1],
                [8,4,2],
                [8,0,9] ])

y = np.array([  [0,0,4],
                [1,5,1],
                [9,9,0],
                [2,2,3] ])

np.matmul(x,y.transpose())

array([[ 12,  14,  27,  15],
       [  4,  24,  63,  17],
       [  8,  30, 108,  30],
       [ 36,  17,  72,  43]])

# Dcorr
## step 1
get distance matrix for each matrix

In [17]:
y_samples, y_dims = y.shape
x_samples, x_dims = x.shape
n = x_samples

$ D^x \ and\ D^y$

In [18]:
def distance_matrix(arr):
    n = arr.shape[0]
    dims = arr.shape[1]
    rtn_matrix = np.empty([n, n])

    for i_1 in range(n):
        for i_2 in range(n):
            acc = 0
            for j in range(dims):
                acc += (arr[i_2,j]-arr[i_1,j]) * (arr[i_2,j]-arr[i_1,j])
            rtn_matrix[i_1, i_2] = math.sqrt(acc)

    return rtn_matrix

Dx = distance_matrix(x)
Dy = distance_matrix(y)

In [47]:
np.identity(n) - (1/n)*np.ones(n)

array([[ 0.75, -0.25, -0.25, -0.25],
       [-0.25,  0.75, -0.25, -0.25],
       [-0.25, -0.25,  0.75, -0.25],
       [-0.25, -0.25, -0.25,  0.75]])

In [34]:
Dy

array([[ 0.        ,  5.91607978, 13.34166406,  3.        ],
       [ 5.91607978,  0.        ,  9.        ,  3.74165739],
       [13.34166406,  9.        ,  0.        , 10.34408043],
       [ 3.        ,  3.74165739, 10.34408043,  0.        ]])

In [35]:
sk_Dy = pairwise_distances(y)
sk_Dy

array([[ 0.        ,  5.91607978, 13.34166406,  3.        ],
       [ 5.91607978,  0.        ,  9.        ,  3.74165739],
       [13.34166406,  9.        ,  0.        , 10.34408043],
       [ 3.        ,  3.74165739, 10.34408043,  0.        ]])

$H$

# $Dcov^b_n(x,y) = \frac{1}{n^2}tr(D^xHD^yH)$

In [38]:
def dist_to_cov(distx,disty):
    H = np.identity(n) - np.ones((n,n))*(1/n)
    trace_arg = np.matmul(distx, np.matmul(H, np.matmul(disty, H)))
    return (1/(n*n)) * np.trace(trace_arg)

In [39]:
Dcov_xy = dist_to_cov(Dx,Dy)
Dcov_xx = dist_to_cov(Dx,Dx)
Dcov_yy = dist_to_cov(Dy,Dy)

In [43]:
Dcov_xy/(math.sqrt(Dcov_xx*Dcov_yy))

0.6588774749068576

In [22]:
stat, pvalue = Dcorr(bias=True).test(x,y)

In [23]:
print(stat)

0.6588774749068574


In [26]:
stat, pvalue = Dcorr(bias=False).test(x,y)

In [27]:
print(stat)

0.15609089851929792


In [28]:
1/Dcov

0.10609906664995314