In [30]:
# add modules to Python's search path
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity  # display as density curves

import torch
from geomloss import SamplesLoss
import tensorflow as tf
from modules import tf_wasserstein as tfw

In [45]:
use_cuda = torch.cuda.is_available()
# N.B.: We use float64 numbers to get nice limits when blur -> +infinity
dtype    = torch.cuda.DoubleTensor if use_cuda else torch.DoubleTensor

# make a convenient wrapper for producing samples in form of a tensor
def torch_sampler(mean, cov, size):
    samples = np.random.multivariate_normal(mean, cov, size)
    return torch.from_numpy(samples)

# set up parameters for our two test distributions
dimension = 3
mean_1 = np.zeros(dimension)
mean_2 = mean_1 + 100.0 * np.ones(dimension)
cov_1 = np.identity(dimension)
cov_2 = cov_1

# finally create the samplers our test distributions
sampler_1 = lambda size: torch_sampler(mean_1, cov_1, size)
sampler_2 = lambda size: torch_sampler(mean_2, cov_2, size)

# test our samplers
print("samples from distribution #1:\n{}".format(sampler_1(3)))
print("samples from distribution #2:\n{}".format(sampler_2(3)))

samples from distribution #1:
tensor([[-0.1216, -0.8382,  0.1725],
        [ 0.7638, -1.3258, -1.3509],
        [-0.1109, -2.0803, -0.8771]], dtype=torch.float64)
samples from distribution #2:
tensor([[100.2360,  98.5904, 100.1034],
        [ 99.2627, 102.6004,  99.8691],
        [102.9425,  98.5901, 100.6022]], dtype=torch.float64)


In [46]:
num_samples_1 = 100
num_samples_2 = 100
samples_1 = sampler_1(num_samples_1)
samples_2 = sampler_2(num_samples_2)
loss = SamplesLoss("sinkhorn", p=2, blur=0.01, scaling=.99, backend="online")
print(np.sqrt(loss(samples_1, samples_2).item()))

122.54486069798962


In [49]:
# make a convenient wrapper for producing samples in form of a tensor
def tf_sampler(mean, cov, size):
    samples = np.random.multivariate_normal(mean, cov, size)
    return tf.convert_to_tensor(samples, dtype=tf.float32)

# set up parameters for our two test distributions
dimension = 4
mean_1 = np.zeros(dimension)
mean_2 = mean_1 + 10.0 * np.ones(dimension)
cov_1 = np.identity(dimension)
cov_2 = cov_1

# finally create the samplers our test distributions
sampler_1 = lambda size: tf_sampler(mean_1, cov_1, size)
sampler_2 = lambda size: tf_sampler(mean_2, cov_2, size)

# test our samplers
print("samples from distribution #1:\n{}".format(sampler_1(3)))
print("samples from distribution #2:\n{}".format(sampler_2(3)))

samples from distribution #1:
[[-0.8672439  -1.5351243   0.0637885  -0.2652631 ]
 [ 0.36599773  0.3301862   1.4789115   0.5465439 ]
 [ 0.18409443  3.2592974   0.869551   -0.11191697]]
samples from distribution #2:
[[10.203678  11.819411  11.632811  11.406468 ]
 [10.654344  10.208561  11.368092   7.2492557]
 [ 9.861263  10.898265   9.832393  10.872247 ]]


In [58]:
num_samples_1 = 1000
num_samples_2 = 1000
samples_1 = sampler_1(num_samples_1)
samples_2 = sampler_2(num_samples_2)
loss = tfw.sinkhorn_loss(samples_1, samples_2, 0.01, num_samples_1, 50, p=2)
print(np.sqrt(loss))

17.537973
