In [1]:
import numpy as np

In [2]:
def euclidean_distance(v1, v2):
    """Computes the Euclidean distance between two vectors"""
    return np.sqrt(np.sum((v1 - v2) ** 2))

In [3]:
vec1 = np.array([1, 2, 3])
vec2 = np.array([1, -1, 0])

euclidean_distance(vec1, vec2)

4.242640687119285

In [4]:
def manhattan_distance(v1, v2):
    """Computes the Manhattan distance between two vectors"""
    return np.sum(np.abs(v1 - v2))

In [5]:
manhattan_distance(vec1, vec2)

6

In [6]:
from numpy.linalg import norm

def angular_distance(v1, v2):
    """Computes the angular distance between two vectors"""
    sim = v1.dot(v2)/(norm(v1) * norm(v2))
    return np.arccos(sim)/np.pi

In [7]:
angular_distance(vec1, vec2)

0.5605188591618384

In [8]:
angular_distance(vec1, vec1)    # Two identical vectors have an angular distance of 0

0.0

In [9]:
angular_distance(vec1, 2 * vec1)    # It's insensitive to magnitude (technically it's not a metric as defined by
                                    # mathematicians because of this, except on a unit circle)

0.0

In [10]:
def hamming_distance(s1, s2):
    """Return the Hamming distance between equal-length sequences"""
    if len(s1) != len(s2):
        raise ValueError("Undefined for sequences of unequal length")
    return sum(el1 != el2 for el1, el2 in zip(s1, s2))

In [11]:
hamming_distance("11101", "11011")

2

In [18]:
def jaccard_distance(s1, s2):
    """Computes the Jaccard distance between two sets"""
    s1, s2 = set(s1), set(s2)
    diff = len(s1.union(s2)) - len(s1.intersection(s2))
    return diff / len(s1.union(s2))

In [19]:
jaccard_distance(["cow", "pig", "horse"], ["cow", "donkey", "chicken"])

{'pig', 'cow', 'horse'} {'donkey', 'chicken', 'cow'}


0.8

In [20]:
jaccard_distance("11101", "11011")    # Sets formed from the contents of these strings are identical/

{'0', '1'} {'0', '1'}


0.0