<a href="https://colab.research.google.com/github/shrutikpawale18/Similarity_Measure/blob/main/SimilarityMeasure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
text_1 = f"The easiest way to earn points with Fetch Rewards is to just shop for the products you already love. If you have any participating brands on your receipt, you'll get points based on the cost of the products. You don't need to clip any coupons or scan individual barcodes. Just scan each grocery receipt after you shop and we'll find the savings for you."

text_2 = f"The easiest way to earn points with Fetch Rewards is to just shop for the items you already buy. If you have any eligible brands on your receipt, you will get points based on the total cost of the products. You do not need to cut out any coupons or scan individual UPCs. Just scan your receipt after you check out and we will find the savings for you."

text_3 = f"We are always looking for opportunities for you to earn more points, which is why we also give you a selection of Special Offers. These Special Offers are opportunities to earn bonus points on top of the regular points you earn every time you purchase a participating brand. No need to pre-select these offers, we'll give you the points whether or not you knew about the offer. We just think it is easier that way."

In [13]:
import math

class TFIDFVectorizer:
    def __init__(self, *texts):
        self.documents = [self.text_to_words(text) for text in texts]
        self.tf_dicts = [self.calculate_tf(doc) for doc in self.documents]
        self.idf_dict = self.calculate_idf(self.documents)
        self.tfidf_vectors = [self.vectorize(doc, self.idf_dict) for doc in self.tf_dicts]

    def text_to_words(self, text):
        words = []
        current_word = ''
        for char in text:
            if char != " ":
                current_word += char
            else:
                words.append(current_word.lower())
                current_word = ''
        words.append(current_word.lower())  # Append the last word
        return words

    def calculate_tf(self, words):
        word_count = {}
        for word in words:
            word_count[word] = word_count.get(word, 0) + 1

        total_words = len(words)
        tf_dict = {word: count / total_words for word, count in word_count.items()}
        return tf_dict

    def calculate_idf(self, documents):
        total_documents = len(documents)
        word_document_count = {}

        for document in documents:
            for word in set(document):
                word_document_count[word] = word_document_count.get(word, 0) + 1

        idf_dict = {word: math.log(total_documents / count) for word, count in word_document_count.items()}
        return idf_dict

    def vectorize(self, tf_dict, idf_dict):
        return [tf_dict[word] * idf_dict[word] for word in tf_dict]


tfidf_vectorizer = TFIDFVectorizer(text_1, text_2, text_3)

# Access TF-IDF vectors for each text
tfidf_text1 = tfidf_vectorizer.tfidf_vectors[0]
tfidf_text2 = tfidf_vectorizer.tfidf_vectors[1]
tfidf_text3 = tfidf_vectorizer.tfidf_vectors[2]

In [15]:
import math

class CosineSimilarity:
    def __init__(self, vec1, vec2):
        self.vec1 = vec1
        self.vec2 = vec2

    def dot_product(self):
        return sum(x * y for x, y in zip(self.vec1, self.vec2))

    def magnitude(self, vec):
        return math.sqrt(sum(x ** 2 for x in vec))

    def calculate_similarity(self):
        dot_prod = self.dot_product()
        mag_vec1 = self.magnitude(self.vec1)
        mag_vec2 = self.magnitude(self.vec2)

        if mag_vec1 == 0 or mag_vec2 == 0:
            return 0  # To handle division by zero

        return dot_prod / (mag_vec1 * mag_vec2)


# Create an instance of CosineSimilarity
cosine_similarity_calculator_1_2 = CosineSimilarity(tfidf_text1, tfidf_text2)
cosine_similarity_calculator_1_3 = CosineSimilarity(tfidf_text1, tfidf_text3)

# Calculate cosine similarity
similarity_1_2 = cosine_similarity_calculator_1_2.calculate_similarity()
similarity_1_3 = cosine_similarity_calculator_1_3.calculate_similarity()

# Print the similarity
print(f"Cosine Similarity between 1 and 2: {similarity_1_2}")
print(f"Cosine Similarity between 1 and 3: {similarity_1_3}")

Cosine Similarity between 1 and 2: 0.7449676327257305
Cosine Similarity between 1 and 3: 0.6308880817004958
