In [1]:
# add modules to Python's search path
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity  # display as density curves

import  torch
from geomloss import SamplesLoss
import tensorflow as tf
from modules import tf_wasserstein as tfw



In [13]:
use_cuda = torch.cuda.is_available()
# N.B.: We use float64 numbers to get nice limits when blur -> +infinity
dtype    = torch.cuda.DoubleTensor if use_cuda else torch.DoubleTensor

# make a convenient wrapper for producing samples in form of a tensor
def torch_sampler(mean, cov, size):
    samples = np.random.multivariate_normal(mean, cov, size)
    return torch.from_numpy(samples)

# set up parameters for our two test distributions
dimension = 3
mean_1 = np.zeros(dimension)
mean_2 = mean_1 + 0.1 * np.ones(dimension)
cov_1 = np.identity(dimension)
cov_2 = cov_1

# finally create the samplers our test distributions
sampler_1 = lambda size: torch_sampler(mean_1, cov_1, size)
sampler_2 = lambda size: torch_sampler(mean_2, cov_2, size)

# test our samplers
print("samples from distribution #1:\n{}".format(sampler_1(3)))
print("samples from distribution #2:\n{}".format(sampler_2(3)))

samples from distribution #1:
tensor([[ 0.8647,  0.4855,  0.8287],
        [ 1.1513,  0.9692, -0.1944],
        [ 0.9109, -0.7132,  0.3794]], dtype=torch.float64)
samples from distribution #2:
tensor([[ 0.5083, -0.9138,  0.3134],
        [ 2.7979,  0.3998,  1.3880],
        [ 1.2808, -0.9813,  0.0138]], dtype=torch.float64)


In [14]:
num_samples_1 = 500
num_samples_2 = 500
samples_1 = sampler_1(num_samples_1)
samples_2 = sampler_2(num_samples_2)
loss = SamplesLoss("sinkhorn", p=2, blur=0.01, scaling=.99, backend="online")
print(np.sqrt(loss(samples_1, samples_2).item()))

0.3853263459986387


In [40]:
# make a convenient wrapper for producing samples in form of a tensor
def tf_sampler(mean, cov, size):
    samples = np.random.multivariate_normal(mean, cov, size)
    return tf.convert_to_tensor(samples, dtype=tf.float32)

# set up parameters for our two test distributions
dimension = 3
mean_1 = np.zeros(dimension)
mean_2 = mean_1 + 0.0 * np.ones(dimension)
cov_1 = np.identity(dimension)
cov_2 = cov_1

# finally create the samplers our test distributions
sampler_1 = lambda size: tf_sampler(mean_1, cov_1, size)
sampler_2 = lambda size: tf_sampler(mean_2, cov_2, size)

# test our samplers
print("samples from distribution #1:\n{}".format(sampler_1(3)))
print("samples from distribution #2:\n{}".format(sampler_2(3)))

samples from distribution #1:
[[-0.10625428 -1.1483074   0.7022027 ]
 [-1.4416115  -1.8117061   0.5113038 ]
 [ 0.02931112  0.3988494  -1.2213601 ]]
samples from distribution #2:
[[ 0.30590484  0.23010959 -2.0424216 ]
 [-0.6116984  -0.9098585  -0.5215559 ]
 [ 0.64031196  1.8631465   0.38402075]]


In [59]:
num_samples_1 = 500
num_samples_2 = 500
samples_1 = sampler_1(num_samples_1)
samples_1 -= tf.reduce_mean(samples_1, axis=0)
samples_2 = sampler_2(num_samples_2)
samples_2 -= tf.reduce_mean(samples_2, axis=0)
print(tf.sqrt(tf.reduce_sum((tf.reduce_mean(samples_1, axis=0) - tf.reduce_mean(samples_2, axis=0))**2)))
loss = tfw.sinkhorn_loss(samples_1, samples_2, 0.01, num_samples_1, 2000, p=2)
print(np.sqrt(loss))

tf.Tensor(3.3067206e-08, shape=(), dtype=float32)
0.45141235


<tf.Tensor: shape=(), dtype=float32, numpy=0.056634635>

In [54]:
samples_1 -= tf.reduce_mean(samples_1, axis=0)

In [58]:
tf.reduce_mean(samples_1, axis=0)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 3.3378602e-09, -6.6757204e-09,  3.8146974e-09], dtype=float32)>