In [6]:
import sys
import os

# Add the parent directory to sys.path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Now your import should work
from llm_engineering.database.mongo_connection import connection
from llm_engineering.model.settings import settings


In [15]:
import uuid
from abc import ABC
from typing import Generic, Type, TypeVar
from loguru import logger
from pydantic import UUID4, BaseModel, Field
from pymongo import errors

# Set up the initial database connection
try:
    _database = connection.get_database(settings.MONGO_DATABASE_NAME)
except Exception as e:
    logger.error(f"Failed to connect to MongoDB: {e}")
    raise e  # Raise an exception if the connection fails

"""
### Object-Document Mapper (ODM) Overview
This class defines a **MongoDB-compatible ODM**, similar to ORM but designed for NoSQL databases.
It allows defining a **base document class** that can be extended for different MongoDB collections.

We leverage **TypeVar (T)** for generic typing, ensuring that subclasses maintain the correct type hinting.

TypeVar Reference:
- https://realpython.com/python-type-checking/
- https://realpython.com/python312-typing/
"""

# Define a **TypeVar** with a bound, restricting it to subclasses of `NoSQLBaseDocument`
T = TypeVar("T", bound="NoSQLBaseDocument")


class NoSQLBaseDocument(BaseModel, Generic[T], ABC):
    """Base class for MongoDB documents using Pydantic and UUIDs."""
    
    # **Auto-generating a UUID4-based unique identifier for MongoDB documents**
    id: UUID4 = Field(default_factory=uuid.uuid4)

    # Implementing comparison and hashing methods for better usage in dictionaries and sets
    def __eq__(self, value: object) -> bool:
        """Enables comparison between instances using their unique `id`."""
        logger.debug(f"Comparing {self.__class__.__name__} with {value.__class__.__name__}")
        if not isinstance(value, self.__class__):
            return False
        return self.id == value.id

    def __hash__(self) -> int:
        """Allows instances to be used as dictionary keys or in sets."""
        return hash(self.id)

    @classmethod
    def from_mongo(cls: Type[T], data: dict) -> T:
        """
        Converts a MongoDB document into a class instance.
        
        - MongoDB stores `_id` as a string; this method **converts it into a UUID**.
        """
        if not data:
            raise ValueError("Data is empty.")
        
        # Extract the MongoDB `_id` and convert it to a UUID object
        id = uuid.UUID(data.pop("_id"))
        
        return cls(**dict(data, id=id))

    def to_mongo(self: T, **kwargs) -> dict:
        """
        Converts the class instance into a MongoDB-compatible dictionary.
        
        - MongoDB expects `_id` as a **string**, so this method ensures proper conversion.
        - Uses Pydantic's `model_dump()` to serialize fields.
        """
        exclude_unset = kwargs.pop("exclude_unset", False) #unpacked from kwargs pop the exclude_unset flag
        by_alias = kwargs.pop("by_alias", True) #unpacked from kwargs pop the by_alias flag

        # Serialize the model instance into a dictionary
        # dumps all fields from kwargs, excluding unset fields if specified
        # and using aliases if specified
        parsed = self.model_dump(exclude_unset=exclude_unset, by_alias=by_alias, **kwargs)

        # Ensure `_id` is correctly set as a string before inserting into MongoDB
        if "_id" not in parsed and "id" in parsed:
            parsed["_id"] = str(parsed.pop("id"))

        # Convert any remaining UUID fields into strings
        for key, value in parsed.items():
            if isinstance(value, uuid.UUID):
                parsed[key] = str(value)

        return parsed

    def save(self: T, **kwargs) -> T | None:
        """
        Saves the document to the MongoDB collection.
        
        - Converts the class instance into MongoDB format using `to_mongo()`.
        """
        collection = _database[self.get_collection_name()]
        try:
            # Insert the document into the MongoDB collection
            logger.debug(f"Inserting document into collection: {self.get_collection_name()}")
            collection.insert_one(self.to_mongo(**kwargs))
            return self
        except errors.WriteError:
            logger.exception("Failed to insert document.")
            return None

    @classmethod
    def get_or_create(cls: Type[T], **filter_options) -> T:
        """
        Retrieves an existing document or **creates** a new one if it doesn't exist.
        
        - Attempts to find a matching document in MongoDB using `filter_options`.
        - If found, it converts the result using `from_mongo()`.
        - Otherwise, it **creates a new instance** and inserts it.
        """
        collection = _database[cls.get_collection_name()]
        try:
            instance = collection.find_one(filter_options)
            if instance:
                return cls.from_mongo(instance)

            # Create and save a new document if no match is found
            new_instance = cls(**filter_options)
            new_instance = new_instance.save()
            
            return new_instance
        except errors.OperationFailure:
            logger.exception(f"Failed to retrieve document with filter options: {filter_options}")
            raise

    @classmethod
    def bulk_insert(cls: Type[T], documents: list[T], **kwargs) -> bool:
        """
        Inserts multiple documents into the MongoDB collection.
        
        - Converts each instance using `to_mongo()` before insertion.
        """
        collection = _database[cls.get_collection_name()]
        try:
            collection.insert_many(doc.to_mongo(**kwargs) for doc in documents)
            return True
        except (errors.WriteError, errors.BulkWriteError):
            logger.error(f"Failed to insert documents of type {cls.__name__}")
            return False

    @classmethod
    def find(cls: Type[T], **filter_options) -> T | None:
        """
        Finds a **single** document in MongoDB.
        
        - Uses the provided filter options.
        """
        collection = _database[cls.get_collection_name()]
        try:
            instance = collection.find_one(filter_options)
            return cls.from_mongo(instance) if instance else None
        except errors.OperationFailure:
            logger.error("Failed to retrieve document")
            return None

    @classmethod
    def bulk_find(cls: Type[T], **filter_options) -> list[T]:
        """
        Finds **multiple** documents matching filter criteria.
        """
        collection = _database[cls.get_collection_name()]
        try:
            instances = collection.find(filter_options)
            return [document for instance in instances if (document := cls.from_mongo(instance)) is not None]
        except errors.OperationFailure:
            logger.error("Failed to retrieve documents")
            return []

    @classmethod
    def get_collection_name(cls: Type[T]) -> str:
        """Retrieves the collection name from the Settings configuration class."""
        try:
            print(f"Using collection name: {cls.Settings.name}")
            if not hasattr(cls, 'Settings'):
                raise ValueError("Settings class is not defined in the document class.")
            return cls.Settings.name
        except AttributeError:
            raise ValueError("Missing 'name' attribute in the Settings configuration class.")

In [8]:
# Define a concrete model so this is user model that inherits from NoSQLBaseDocument
class UserModel(NoSQLBaseDocument):
    """Example user document."""
    name: str
    email: str


In [9]:
mongo_doc = {"_id": "123", "name": "Michael", "email": "michael@example.com"}

In [10]:
# Simulated MongoDB document
mongo_doc = {
    "_id": str(uuid.uuid4()),  # MongoDB stores _id as a string
    "name": "Michael",
    "email": "michael@example.com"
}

# Convert MongoDB document to UserModel instance
user_instance = UserModel.from_mongo(mongo_doc)

# Print the output
print(f"User ID (UUID4): {user_instance.id}")
print(f"User Name: {user_instance.name}")
print(f"User Email: {user_instance.email}")

User ID (UUID4): 7df7b763-a017-4a88-880c-4be1237fdf63
User Name: Michael
User Email: michael@example.com


In [11]:
# Create an instance
user = UserModel(name="Michael", email="michael@example.com")

# Convert to MongoDB format
mongo_data = user.to_mongo()

# Print the results
print("MongoDB-Compatible Dictionary:")
print(mongo_data)


MongoDB-Compatible Dictionary:
{'name': 'Michael', 'email': 'michael@example.com', '_id': '72bc0ef3-956b-4b2e-85bc-a6f8d9d75dfd'}


In [17]:
class UserDocument(NoSQLBaseDocument):
    """Example document for MongoDB."""

    name: str
    email: str

    # Define the Settings class to specify the MongoDB collection name
    class Settings:
        name = "users"  # MongoDB collection name


user = UserDocument(name="Michael", email="michael@example.com")
user.save()  # Saves user to MongoDB

[32m2025-06-03 17:16:48.797[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36msave[0m:[36m99[0m - [34m[1mInserting document into collection: users[0m


Using collection name: users
Using collection name: users


UserDocument(id=UUID('25329301-1a65-486f-9e7c-74c085b37d1f'), name='Michael', email='michael@example.com')

In [18]:
user = UserDocument.get_or_create(name="Michael", email="michael@example.com")
print(user.id, user.name, user.email)

Using collection name: users
25329301-1a65-486f-9e7c-74c085b37d1f Michael michael@example.com


In [19]:
users = [
    UserDocument(name="Alice", email="alice@example.com"),
    UserDocument(name="Bob", email="bob@example.com"),
]
UserDocument.bulk_insert(users)

Using collection name: users


True

In [20]:
users = UserDocument.bulk_find(email="alice@example.com")
print(users)

Using collection name: users
[UserDocument(id=UUID('b90fa9cd-45f0-4a6b-a849-8ffcef6b429f'), name='Alice', email='alice@example.com')]


In [None]:
# since it already exists confirming that it returns same UUID
UserDocument.get_or_create(name="Michael", email="michael@example.com")

Using collection name: users


UserDocument(id=UUID('25329301-1a65-486f-9e7c-74c085b37d1f'), name='Michael', email='michael@example.com')