# Euclidean Distance or L2 Norm

In [64]:
import scipy.spatial.distance as dist
from scipy.spatial.distance import euclidean
import numpy as np
import pandas as pd

In [50]:
# Prepare 2 vectors (data points) of 10 dimensions
A = np.random.uniform(0, 10, 10)
B = np.random.uniform(0, 10, 10)
# A=pd.DataFrame(A)
# B=pd.DataFrame(B)
# result=pd.concat([A,B],axis=1)
# result.columns=['A','B']
# print(result)


In [51]:
np.shape(A)
np.shape(B)

(10,)

### Perform distance measurements
#### SciPy Library ka dist.euclidean() function:

In [53]:
print ('\n2 10-dimensional vectors')
print(A)
print (B)
# Perform distance measurements
print ('\nDistance measurements with 10-dimensional vectors')
print ('\nEuclidean distance is', dist.euclidean(A, B))


2 10-dimensional vectors
[9.03301696 6.82472646 0.51051703 2.02789351 5.12946942 5.56112433
 1.61366682 1.00051017 7.58755653 1.91222797]
[9.19400108 4.19505241 2.42095315 3.50148604 1.11545913 4.19427289
 3.18805983 4.0287522  2.82612275 5.78247521]

Distance measurements with 10-dimensional vectors

Euclidean distance is 8.946615700232226


### Custom function for Euclidean distance:

In [54]:
from math import sqrt

In [55]:
# calculate euclidean distance

In [60]:
def euclidean_distance(a, b): 
    return sqrt(sum((e1-e2)**2  #Euclidean Dist  Formula
                    for e1, e2 in zip(a,b)))

In [59]:
e1 = [10, 20, 15, 10, 5]
e2 = [12, 24, 18, 8, 7]
dist = euclidean_distance(e1, e2)
print(dist)

6.082762530298219


In [63]:
row1 = [10, 20, 15, 10, 5]
row2 = [12, 24, 18, 8, 7]
# calculate distance
dist = euclidean(row1, row2)
print(dist)

6.082762530298219


# Manhattan Distance

In [65]:
from math import sqrt

In [67]:
def manhattan_distance(a, b):
    return sum(abs(e1-e2) for e1, e2 in zip(a,b))

In [69]:
# define data
e1 = [10, 20, 15, 10, 5]
e2 = [12, 24, 18, 8, 7]
dist = manhattan_distance(e1, e2)
print(dist)

13


### using the cityblock() function

In [70]:
from scipy.spatial.distance import cityblock

In [73]:
# define data
e1 = [10, 20, 15, 10, 5]
e2 = [12, 24, 18, 8, 7]
dist = cityblock(e1, e2)
print(dist)

13


# Minkowski distance

In [74]:
from math import sqrt

In [79]:
# calculate minkowski distance
def minkowski_distance(a, b, p):
    return sum(abs(e1-e2)**p for e1, e2 in zip(a,b))**(1/p)
# define data
e1 = [10, 20, 15, 10, 5]
e2 = [12, 24, 18, 8, 7]
# calculate distance (p=1)
dist = minkowski_distance(e1, e2,1)
print(dist)
# calculate distance (p=2)
dist = minkowski_distance(e1, e2, 2)
print(dist)

13.0
6.082762530298219


### using the minkowski_distance() function

In [81]:
# define data
e1 = [10, 20, 15, 10, 5]
e2 = [12, 24, 18, 8, 7]
# calculate distance (p=1)
dist = minkowski_distance(e1, e2, 1)
print(dist)
# calculate distance (p=2)
dist = minkowski_distance(e1, e2, 2)
print(dist)

13.0
6.082762530298219


# Cosine Similarity

In [85]:
import numpy as np

In [99]:
def cosine_similarity(x, y):
    if len(x) != len(y) :
        return None
    dot_product = np.dot(x, y)
    magnitude_x = np.sqrt(np.sum(x**2))
    magnitude_y = np.sqrt(np.sum(y**2))
    cosine_similarity = dot_product / (magnitude_x * magnitude_y)
    return cosine_similarity

In [100]:
corpus = [ 'data science is one of the most important fields of science',
           'this is one of the best data science courses',
           'data scientists analyze data' ]

In [102]:
from sklearn.feature_extraction.text import CountVectorizer

In [103]:
X = CountVectorizer().fit_transform(corpus).toarray()

In [105]:
# Cosine similarity calculate karein
cos_sim_1_2 = cosine_similarity(X[0, :], X[1, :])
cos_sim_1_3 = cosine_similarity(X[0, :], X[2, :])
cos_sim_2_3 = cosine_similarity(X[1, :], X[2, :])
print('Cosine Similarity between: ')
print('\tDocument 1 and Document 2: ', cos_sim_1_2)
print('\tDocument 1 and Document 3: ', cos_sim_1_3)
print('\tDocument 2 and Document 3: ', cos_sim_2_3)

Cosine Similarity between: 
	Document 1 and Document 2:  0.6885303726590962
	Document 1 and Document 3:  0.21081851067789195
	Document 2 and Document 3:  0.2721655269759087


In [107]:
import numpy as np
def jaccard_binary(x,y):
    intersection = np.logical_and(x, y)
    union = np.logical_or(x, y)
    similarity = intersection.sum() / float(union.sum())
    return similarity

In [116]:
# Define some binary vectors
x = [0,1,0,0,0,1,0,0,1]
y = [0,0,1,0,0,0,0,0,1]
z = [1,1,0,0,0,1,0,0,0]
simxy = jaccard_binary(x,y)
simxz = jaccard_binary(x,z)
simyz = jaccard_binary(y,z)
print(' Similarity between x and y is', simxy, '\n Similarity between x and z is ', simxz, '\nSimilarity between x and z is ', simyz)

 Similarity between x and y is 0.25 
 Similarity between x and z is  0.5 
Similarity between x and z is  0.0
