Correlation, Cosine similarity, and Euclidean distance using NumPy

In [None]:
import numpy as np

#Given vectors
vector1 = np.array([1, 2, 3, 4, 5])
vector2 = np.array([-1, 77, 5, -9, 8])

#calculating correlation
correlation = np.corrcoef(vector1, vector2)[0, 1]

#calculating cosine similarity
cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))

#calculating Euclidean distance
euclidean_distance = np.linalg.norm(vector1 - vector2)


#printing output
print('Calculated using "NumPy"')
print('\nCorrelation: ', correlation)
print('Cosine similarity: ', cosine_similarity)
print('Euclidean distance: ', euclidean_distance)

Calculated using "NumPy"

Correlation:  -0.3097315126985083
Cosine similarity:  0.29694921713297506
Euclidean distance:  76.2299153875957


Correlation, Cosine similarity, and Euclidean distance using SciPy

---



Using SciPy

In [None]:
import scipy.stats
from scipy.spatial.distance import cosine, euclidean

#Correlation calculation
correlation_sp = scipy.stats.pearsonr(vector1, vector2)[0]

#Cosine similarity calculation
cosine_similarity_sp = 1 - cosine(vector1, vector2)

#Euclidean distance calculation
euclidean_distance_sp = euclidean(vector1, vector2)


#printing SciPy output
print('Calculated using "SciPy"')
print('\nCorrelation: ', correlation_sp)
print('Cosine similarity: ', cosine_similarity_sp)
print('Euclidean distance: ', euclidean_distance_sp)

Calculated using "SciPy"

Correlation:  -0.3097315126985083
Cosine similarity:  0.29694921713297506
Euclidean distance:  76.2299153875957


Correlation, Cosine similarity, and Euclidean distance using Pandas

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import math

#creating Pandas series
series1 = pd.Series(vector1)
series2 = pd.Series(vector2)


#Correlation calculation
correlation_pd = series1.corr(series2)

#Cosine similarity calculation
cosine_similarity_pd = cosine_similarity([series1.values], [series2.values])[0][0]


#Euclidean distance calculation
euclidean_distance_pd = math.sqrt(sum((series1 - series2) ** 2))

#printing Pandas output
print('Calculated using "Pandas"')
print('\nCorrelation: ', correlation_pd)
print('Cosine similarity: ', cosine_similarity_pd)
print('Euclidean distance: ', euclidean_distance_pd)

Calculated using "Pandas"

Correlation:  -0.3097315126985083
Cosine similarity:  0.296949217132975
Euclidean distance:  76.2299153875957
