Name: Temirbekova Dilnaz

Task: Tanimoto score.<br> 
"Find out what a Tanimoto similarity score is. In what cases
could this be used as the similarity metric instead of Euclidean distance or
Pearson coefficient? Create a new similarity function using the Tanimoto score."

In [1]:
import math
import numpy as np
import hashlib
from scipy.stats import pearsonr 

In [2]:
class Similarity:
    def __init__(self, minimum):
        self.e = minimum
        self.vector_operators = VectorOperations()

    def square_euclidean_distance(self, p_vec, q_vec):
        diff = p_vec - q_vec
        return max(np.sum(diff**2), self.e)

    def euclidean_distance(self, p_vec, q_vec):
        return max(math.sqrt(self.square_euclidean_distance(p_vec, q_vec)), self.e)

    def tanimoto_score(self, p_vec, q_vec):
        N = len(p_vec)
        assert N == len(q_vec)
        v1v2, v1v1, v2v2 = 0., 0., 0.
        for i in range(N):
            v1v2 += p_vec[i] * q_vec[i]
            v1v1 += p_vec[i] * p_vec[i]
            v2v2 += q_vec[i] * q_vec[i]

        return v1v2 / (v1v1 + v2v2 - v1v2)

    def pearson_coefficient(self, p_vec, q_vec):
        corr, _ =pearsonr(p_vec, q_vec)
        return corr

    @staticmethod
    def get_key(p_vec, q_vec):
        return str(hashlib.sha1(p_vec)) + str(hashlib.sha1(q_vec))
class VectorOperations():
   @staticmethod
   def product(p_vec, q_vec):
        return p_vec * q_vec

   @staticmethod
   def square(p_vec):
        return p_vec**2

   @staticmethod
   def norm(p_vec):
        return np.sqrt(p_vec)

In [3]:
v1 = np.linspace(-np.pi, np.pi, 10)
v2 = np.linspace(np.pi, -np.pi, 10)
print(v1)
print(v2)

[-3.14159265 -2.44346095 -1.74532925 -1.04719755 -0.34906585  0.34906585
  1.04719755  1.74532925  2.44346095  3.14159265]
[ 3.14159265  2.44346095  1.74532925  1.04719755  0.34906585 -0.34906585
 -1.04719755 -1.74532925 -2.44346095 -3.14159265]


In [4]:
sim=Similarity(-4)
print(sim.euclidean_distance(v1,v2))
print(sim.tanimoto_score(v1,v2))
print(sim.pearson_coefficient(v1,v2))

12.682192146761622
-0.33333333333333337
-1.0
