In [1]:
import torch

In [182]:
A = torch.rand(100, 384)
B = torch.rand(100, 384)
# A = A.view(-1)

In [183]:
samples = A.shape[0]

In [184]:
def pdf_info(tensor, q1, q2):
    reshaped = tensor.view(-1)
    vals, _ = torch.sort(reshaped)
    lower_index = torch.tensor(len(vals)*(q1/100.), dtype=torch.long)
    upper_index = torch.tensor(len(vals)*(q2/100.), dtype=torch.long)
    iqr, r = vals[upper_index]-vals[lower_index], max(reshaped)-min(reshaped)
    return iqr, r

In [185]:
iqr, r = pdf_info(A, 25, 75)

In [328]:
if iqr > 1e-5:
    bin_width = 2*iqr/np.cbrt(samples)
    bins = int(torch.round(r/bin_width))
else:
    # MNIST (since it's really only supposed to be either 0 or 1 as output)
    # TODO: bin number
    bins = 2

# Bin data
x = []
for i in range(A.shape[1]):
    split_sizes = list(torch.histc(A[:, i].unsqueeze(0), bins=bins))
#     x.append(torch.histc(A[:, i].unsqueeze(0), bins=bins))
    
# x = torch.stack(x, dim=0).t()
# x[x == 0.] = .0001
# res = np.array(x).T
# res[res == 0] = .00001

In [356]:
torch.distributions.distribution.Distribution(A)

Distribution()

In [358]:
help(F.kl_div)

Help on function kl_div in module torch.nn.functional:

kl_div(input, target, size_average=None, reduce=None, reduction='elementwise_mean')
    The `Kullback-Leibler divergence`_ Loss.
    
    See :class:`~torch.nn.KLDivLoss` for details.
    
    Args:
        input: Tensor of arbitrary shape
        target: Tensor of the same shape as input
        size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
            the losses are averaged over each loss element in the batch. Note that for
            some losses, there multiple elements per sample. If the field :attr:`size_average`
            is set to ``False``, the losses are instead summed for each minibatch. Ignored
            when reduce is ``False``. Default: ``True``
        reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
            losses are averaged or summed over observations for each minibatch depending
            on :attr:`size_average`. When :attr:`reduce` is ``Fa

In [360]:
F.kl_div(F.log_softmax(A, dim=0), F.softmax(B, dim=0))

tensor(0.0001)

In [212]:
from scipy.stats import entropy, ks_2samp, moment, wasserstein_distance, energy_distance

# Computing metrics for different archtypes
def compute_divergences(A, B):
    """ Compute divergence metrics (Jensen Shannon, Kullback-Liebler,
    Wasserstein Distance, Energy Distance) between predicted distribution A
    and true distribution B """

    # Get number of samples, IQR statistics, range
    samples = A.shape[0]
    iqr = np.percentile(A, 75)-np.percentile(A, 25)
    r = np.max(A) - np.min(A)

    # Get PDFs of predicted distribution A, true distribution B
    B = get_pdf(B, iqr, r, samples)
    A = get_pdf(A, iqr, r, samples)
    
    # Mean
    m = (np.array(A)+np.array(B))/2

    # Compute metrics
    kl = entropy(pk=A, qk=B).sum()/A.shape[1]
    js = .5*(entropy(pk=A, qk=m)+entropy(pk=B, qk=m)).sum()/A.shape[1]
    wd = sum([wasserstein_distance(A[:,i], B[:,i]) for i in range(A.shape[1])])
    ed = sum([energy_distance(A[:,i], B[:,i]) for i in range(A.shape[1])])

    divergences = {"KL-Divergence": kl,
                    "Jensen-Shannon": js,
                    "Wasserstein-Distance": wd,
                    "Energy-Distance": ed}

    return divergences


def get_pdf(data, iqr, r, samples):
    """ Compute optimally binned probability distribution function  """
    x = []

    if iqr > 1e-5:
        bin_width = 2*iqr/np.cbrt(samples)
        bins = int(round(r/bin_width, 0))
    else:
        # MNIST (since it's really only supposed to be either 0 or 1 as output)
        # TODO: bin number
        bins = 2

    # Bin data
    for i in range(data.shape[1]):
        x.append(list(np.histogram(data[:, i], bins=bins, density=True)[0]))
    
    res = np.array(x).T
    res[res == 0] = .00001
    return res

In [370]:
%%time
A = torch.rand(1024, 512)
B = torch.rand(1024, 512)
divs = compute_divergences(A.numpy(), B.numpy())

CPU times: user 212 ms, sys: 6.86 ms, total: 219 ms
Wall time: 244 ms


In [316]:
sorted_tensors = torch.sort(A, dim=1)[0]

In [318]:
torch.histc(sorted_tensors, bins=34)

tensor([15083., 15536., 15636., 15445., 15468., 15251., 15510., 15575., 15628.,
        15440., 15289., 15433., 15451., 15381., 15356., 15403., 15544., 15322.,
        15228., 15354., 15345., 15427., 15361., 15291., 15482., 15476., 15276.,
        15434., 15417., 15167., 15367., 15610., 15508., 15794.])

In [371]:
help(np.diff)

Help on function diff in module numpy.lib.function_base:

diff(a, n=1, axis=-1)
    Calculate the n-th discrete difference along the given axis.
    
    The first difference is given by ``out[n] = a[n+1] - a[n]`` along
    the given axis, higher differences are calculated by using `diff`
    recursively.
    
    Parameters
    ----------
    a : array_like
        Input array
    n : int, optional
        The number of times values are differenced. If zero, the input
        is returned as-is.
    axis : int, optional
        The axis along which the difference is taken, default is the
        last axis.
    
    Returns
    -------
    diff : ndarray
        The n-th differences. The shape of the output is the same as `a`
        except along `axis` where the dimension is smaller by `n`. The
        type of the output is the same as the type of the difference
        between any two elements of `a`. This is the same as the type of
        `a` in most cases. A notable exception is `d

In [298]:
maxes = torch.max(torch.sort(A, dim=1)[0], dim=1)[0]

In [231]:
import torch.nn.functional as F

In [369]:
F.kl_div(F.log_softmax(A, dim=1), F.softmax(B, dim=1), reduction='sum') / A.shape[0]

tensor(0.0817)

In [367]:
A.shape

torch.Size([1024, 512])

In [368]:
B.shape

torch.Size([1024, 512])

In [248]:
divs = compute_divergences(A.numpy(), B.numpy())

In [249]:
divs

{'KL-Divergence': 0.008894706257342695,
 'Jensen-Shannon': 0.0022205919229779,
 'Wasserstein-Distance': 18.00936629283771,
 'Energy-Distance': 51.749713484890535}

In [207]:
%%time
A = torch.rand(1024, 16)
B = torch.rand(1024, 16)
A, B  = compute_divergences(A.numpy(), B.numpy()) 

CPU times: user 8.8 ms, sys: 2.59 ms, total: 11.4 ms
Wall time: 8.93 ms


In [202]:
help(np.histogram)

Help on function histogram in module numpy.lib.function_base:

histogram(a, bins=10, range=None, normed=False, weights=None, density=None)
    Compute the histogram of a set of data.
    
    Parameters
    ----------
    a : array_like
        Input data. The histogram is computed over the flattened array.
    bins : int or sequence of scalars or str, optional
        If `bins` is an int, it defines the number of equal-width
        bins in the given range (10, by default). If `bins` is a
        sequence, it defines the bin edges, including the rightmost
        edge, allowing for non-uniform bin widths.
    
        .. versionadded:: 1.11.0
    
        If `bins` is a string from the list below, `histogram` will use
        the method chosen to calculate the optimal bin width and
        consequently the number of bins (see `Notes` for more detail on
        the estimators) from the data that falls within the requested
        range. While the bin width will be optimal for the actua