In [None]:
!pip install sentence-transformers



In [None]:
# First, let's implement the Node class for our linked lists
class Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None

# Next, implement a simplified LinkedList class
class LinkedList:
    def __init__(self):
        self.head = None

    def insert(self, key, value):
        # If the list is empty, create a new head
        if not self.head:
            self.head = Node(key, value)
            return

        # If the key already exists, update its value
        current = self.head
        while current:
            if current.key == key:
                current.value = value
                return
            if not current.next:
                break
            current = current.next

        # If key doesn't exist, add a new node at the end
        current.next = Node(key, value)

    def search(self, key):
        current = self.head
        while current:
            if current.key == key:
                return current.value
            current = current.next
        return None

    def delete(self, key):
        if not self.head:
            return None

        # If head contains the key
        if self.head.key == key:
            value = self.head.value
            self.head = self.head.next
            return value

        # Search for the key in remaining nodes
        current = self.head
        while current.next:
            if current.next.key == key:
                value = current.next.value
                current.next = current.next.next
                return value
            current = current.next

        return None

# Now, implement the HashMap class
class HashMap:
    def __init__(self, size=16):
        self.size = size
        self.buckets = [LinkedList() for _ in range(size)]

    def hash_function(self, key):
        """Convert string key to an index in our array of buckets"""
        if isinstance(key, str):
            # Simple hash function for strings
            hashcode = 0
            for char in key:
                hashcode += ord(char)
            return hashcode % self.size
        else:
            # For non-string keys, use their hash value
            return hash(key) % self.size

    def put(self, key, value):
        """Add a key-value pair to the hashmap"""
        bucket_index = self.hash_function(key)
        self.buckets[bucket_index].insert(key, value)

    def get(self, key):
        """Retrieve a value by key"""
        bucket_index = self.hash_function(key)
        return self.buckets[bucket_index].search(key)

    def remove(self, key):
        """Remove a key-value pair"""
        bucket_index = self.hash_function(key)
        return self.buckets[bucket_index].delete(key)

# Import the necessary library for sentence embeddings
from sentence_transformers import SentenceTransformer
import numpy as np
from typing import Dict, List, Tuple

class BookMetadata:
    def __init__(self, title, author, genre, description):
        self.title = title
        self.author = author
        self.genre = genre
        self.description = description
        self.embedding = None  # Will store vector representation later

class SemanticBookRecommender:
    def __init__(self, embedding_dim=384):  # Typical dimension for sentence embeddings
        self.book_map = HashMap(size=64)  # Custom HashMap to store book data
        self.book_embeddings = []  # List to store book embeddings and IDs
        self.embedding_dim = embedding_dim

    def add_book(self, book_id, metadata):
        """Add a book to the recommender system"""
        self.book_map.put(book_id, metadata)

    def get_book(self, book_id):
        """Retrieve book metadata"""
        return self.book_map.get(book_id)

    def generate_embeddings(self, model_name="all-MiniLM-L6-v2"):
        """Generate embeddings for all books using a sentence transformer"""
        # Load the sentence transformer model
        model = SentenceTransformer(model_name)

        # Iterate through all books and generate embeddings
        for i in range(self.book_map.size):
            bucket = self.book_map.buckets[i]
            current = bucket.head

            while current:
                # Generate real embeddings for the book description
                if current and current.value and hasattr(current.value, 'description'):
                    current.value.embedding = model.encode(current.value.description)
                    self.book_embeddings.append((current.key, current.value.embedding))
                current = current.next

    def find_similar_books(self, query_book_id, top_n=5):
        """Find books similar to the query book"""
        query_book = self.get_book(query_book_id)
        if not query_book or query_book.embedding is None:
            raise ValueError("Book not found or embeddings not generated")

        # Calculate cosine similarity with all books
        similarities = []
        for book_id, embedding in self.book_embeddings:
            if book_id != query_book_id:
                similarity = self._cosine_similarity(query_book.embedding, embedding)
                similarities.append((book_id, similarity))

        # Sort by similarity (descending) and return top_n
        similarities.sort(key=lambda x: x[1], reverse=True)
        return similarities[:top_n]

    def _cosine_similarity(self, vec1, vec2):
        """Calculate cosine similarity between two vectors"""
        dot_product = np.dot(vec1, vec2)
        norm1 = np.linalg.norm(vec1)
        norm2 = np.linalg.norm(vec2)
        return dot_product / (norm1 * norm2)

# Example usage
def example_recommender_system():
    recommender = SemanticBookRecommender()

    # Add some sample books
    book1 = BookMetadata(
        "1984",
        "George Orwell",
        "Dystopian",
        "A dystopian social science fiction novel that examines the consequences of totalitarianism and mass surveillance."
    )
    book2 = BookMetadata(
        "Brave New World",
        "Aldous Huxley",
        "Dystopian",
        "A dystopian social science fiction novel that examines a genetically modified society striving for happiness at the expense of individuality."
    )
    book3 = BookMetadata(
        "The Great Gatsby",
        "F. Scott Fitzgerald",
        "Classic",
        "A novel that follows a cast of characters living in the fictional towns of West Egg and East Egg on Long Island in the summer of 1922."
    )
    book4 = BookMetadata(
        "Animal Farm",
        "George Orwell",
        "Political Satire",
        "A farm fable that serves as an allegory for the Russian Revolution and totalitarianism."
    )

    recommender.add_book("book1", book1)
    recommender.add_book("book2", book2)
    recommender.add_book("book3", book3)
    recommender.add_book("book4", book4)

    print("Generating embeddings (this might take a moment)...")
    # Generate embeddings
    recommender.generate_embeddings()

    # Find similar books
    similar_books = recommender.find_similar_books("book1", top_n=3)
    print("\nBooks similar to '1984':")
    for book_id, similarity in similar_books:
        book = recommender.get_book(book_id)
        print(f"- {book.title} by {book.author} (Similarity: {similarity:.4f})")

# Run the example
example_recommender_system()

Generating embeddings (this might take a moment)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Books similar to '1984':
- Brave New World by Aldous Huxley (Similarity: 0.4804)
- Animal Farm by George Orwell (Similarity: 0.3613)
- The Great Gatsby by F. Scott Fitzgerald (Similarity: 0.2512)
