-
Notifications
You must be signed in to change notification settings - Fork 2
/
sinkhorn.py
76 lines (57 loc) · 2.43 KB
/
sinkhorn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import torch
import torch.nn as nn
use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
dtypeint = torch.cuda.LongTensor if use_cuda else torch.LongTensor
# Adapted from https://github.com/gpeyre/SinkhornAutoDiff
# and from https://github.com/dfdazac/wassdistance/blob/master/layers.py
class SinkhornDistance(nn.Module):
def __init__(self, eps, max_iter, reduction='none'):
super(SinkhornDistance, self).__init__()
self.eps = eps
self.max_iter = max_iter
self.reduction = reduction
def forward(self, c):
C = -c
x_points = C.shape[-2]
y_points = C.shape[-1]
batch_size = C.shape[0]
# both marginals are fixed with equal weights
mu = torch.empty(batch_size, x_points, dtype=torch.float,
requires_grad=False, device=C.device).fill_(1.0 / x_points).squeeze()
nu = torch.empty(batch_size, y_points, dtype=torch.float,
requires_grad=False, device=C.device).fill_(1.0 / y_points).squeeze()
if mu.dim() < 2:
mu = mu.view(-1, 1)
if nu.dim() < 2:
nu = nu.view(-1, 1)
u = torch.zeros_like(mu)
v = torch.zeros_like(nu)
# Stopping criterion
thresh = 1e-12
# Sinkhorn iterations
for i in range(self.max_iter):
if i % 2 == 0:
u1 = u # useful to check the update
u = self.eps * (torch.log(mu) - torch.logsumexp(self.M(C, u, v), dim=-1)) + u
err = (u - u1).abs().sum(-1).mean()
else:
v = self.eps * (torch.log(nu) - torch.logsumexp(self.M(C, u, v).transpose(-2, -1), dim=-1)) + v
v = v.detach().requires_grad_(False)
v[v > 9 * 1e8] = 0.0
v = v.detach().requires_grad_(True)
if err.item() < thresh:
break
U, V = u, v
# Transport plan pi = diag(a)*K*diag(b)
pi = torch.exp(self.M(C, U, V))
# Sinkhorn distance
return pi, C, U, V
def M(self, C, u, v):
"Modified cost for logarithmic updates"
"$M_{ij} = (-c_{ij} + u_i + v_j) / \epsilon$"
return (-C + u.unsqueeze(-1) + v.unsqueeze(-2)) / self.eps
@staticmethod
def ave(u, u1, tau):
"Barycenter subroutine, used by kinetic acceleration through extrapolation."
return tau * u + (1 - tau) * u1