-
Notifications
You must be signed in to change notification settings - Fork 0
/
distance_metrics.py
112 lines (84 loc) · 2.89 KB
/
distance_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Meaning Representations from Trajectories in Autoregressive Models
# https://github.com/tianyu139/meaning-as-trajectories
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import numpy as np
from scipy.stats import entropy
class BaseDistanceMetric:
def __init__(self):
pass
def compute(self, a, b):
# Takes as input two log probabilities
raise NotImplementedError
class L1Distance(BaseDistanceMetric):
def compute(self, a, b):
return np.linalg.norm(np.array(a) - np.array(b), ord=1)
def __str__(self):
return "l1"
class L2Distance(BaseDistanceMetric):
def compute(self, a, b):
return np.linalg.norm(np.array(a) - np.array(b), ord=2)
def __str__(self):
return "l2"
class LInfDistance(BaseDistanceMetric):
def compute(self, a, b):
return np.linalg.norm(np.array(a) - np.array(b), ord=np.inf)
def __str__(self):
return "linf"
class SymmetricKLDivergence(BaseDistanceMetric):
def __init__(self, temp):
self.temp = temp
def compute(self, a, b):
a = np.exp(np.array(a) * self.temp)
a /= sum(a)
b = np.exp(np.array(b) * self.temp)
b /= sum(b)
return entropy(a, qk=b) + entropy(b, qk=a)
def __str__(self):
return f"symkl-temp={self.temp}"
class TotalVariationDistance(BaseDistanceMetric):
def __init__(self, temp):
self.temp = temp
def compute(self, a, b):
a = np.exp(np.array(a) * self.temp)
a /= sum(a)
b = np.exp(np.array(b) * self.temp)
b /= sum(b)
return np.linalg.norm(a - b, ord=np.inf)
def __str__(self):
return f"tv-temp={self.temp}"
class HellingerDistance(BaseDistanceMetric):
def __init__(self, temp):
self.temp = temp
def compute(self, a, b):
a = np.exp(np.array(a) * self.temp)
a /= sum(a)
b = np.exp(np.array(b) * self.temp)
b /= sum(b)
a = np.sqrt(a)
b = np.sqrt(b)
return np.linalg.norm(a - b, ord=2) / np.sqrt(2)
def __str__(self):
return f"hel-temp={self.temp}"
class CosineSimilarity(BaseDistanceMetric):
# Note: not really a metric
def compute(self, a, b):
EPS=1e-6
a = np.array(a)
b = np.array(b)
return -(np.dot(a,b) / np.max([EPS, np.linalg.norm(a) * np.linalg.norm(b)])) + 1
def __str__(self):
return "cossim"
class CosineSimilarityProb(BaseDistanceMetric):
# Note: not really a metric
def __init__(self, temp):
self.temp = temp
def compute(self, a, b):
EPS=1e-6
a = np.exp(np.array(a) * self.temp)
a /= np.sum(a)
b = np.exp(np.array(b) * self.temp)
b /= np.sum(b)
return -(np.dot(a,b) / np.max([EPS, np.linalg.norm(a) * np.linalg.norm(b)])) + 1
def __str__(self):
return f"cossim-temp={self.temp}"