In [5]:
import math
import numpy as np

In [6]:
def euclidean_distance(p, q):
  """Calculates the Euclidean distance between two data points.

  Args:
    p: A list of coordinates for the first data point.
    q: A list of coordinates for the second data point.

  Returns:
    The Euclidean distance between the two data points.
  """

  return math.sqrt(sum((pi - qi)**2 for pi, qi in zip(p, q)))

def manhattan_distance(p, q):
  """Calculates the Manhattan distance between two data points.

  Args:
    p: A list of coordinates for the first data point.
    q: A list of coordinates for the second data point.

  Returns:
    The Manhattan distance between the two data points.
  """

  return sum(abs(pi - qi) for pi, qi in zip(p, q))

def minkowski_distance(p, q, p_norm=2):
  """Calculates the Minkowski distance between two data points.

  Args:
    p: A list of coordinates for the first data point.
    q: A list of coordinates for the second data point.
    p_norm: The order of the Minkowski distance. The default value is 2, which corresponds to the Euclidean distance.

  Returns:
    The Minkowski distance between the two data points.
  """

  return (sum(abs(pi - qi)**p_norm for pi, qi in zip(p, q)))**1.0 / p_norm

In [8]:
def calculate_distance_matrix(data):
  """Calculates the distance matrix between all features of data using the specified distance metric.

  Args:
    data: A NumPy array containing the data.
    metric: The distance metric to use. Valid options are "euclidean", "manhattan", or "minkowski".

  Returns:
    A NumPy array containing the distance matrix.
  """

  distance_matrix = np.zeros((data.shape[0], data.shape[0]))
  for i in range(data.shape[0]):
    for j in range(data.shape[0]):
      distance_matrix[i, j] = euclidean_distance(data[i], data[j])
  return distance_matrix

# Example usage:

data = np.array([[1, 2], [3, 4], [5, 6]])
distance_matrix = calculate_distance_matrix(data)

print(distance_matrix)

[[0.         2.82842712 5.65685425]
 [2.82842712 0.         2.82842712]
 [5.65685425 2.82842712 0.        ]]


In [9]:
# Jaccard distance metric

def jaccard_similarity(set1, set2):
    # intersection of two sets
    intersection = len(set1.intersection(set2))
    # Unions of two sets
    union = len(set1.union(set2))
     
    return intersection / union
 
set_a = {"Geeks", "for", "Geeks", "NLP", "DSc"}
set_b = {"Geek", "for", "Geeks", "DSc.", 'ML', "DSA"}
 
similarity = jaccard_similarity(set_a, set_b)
print("Jaccard Similarity:", similarity)

Jaccard Similarity: 0.25
