In [53]:
import docker
from pymongo import MongoClient
import time

def setup_or_connect_mongo_container():
    # Initialize Docker client
    client = docker.from_env()
    container_name = "local2817"
    host_port = 50882
    container_port = 27017
    image_name = "mongodb/mongodb-atlas-local:8.0"

    try:
        # Check if the container already exists
        print("Checking for existing MongoDB container...")
        containers = client.containers.list(all=True, filters={"name": container_name})

        if containers:
            # If the container exists, check its status
            container = containers[0]
            if container.status != "running":
                print(f"Starting existing container '{container_name}'...")
                container.start()
            else:
                print(f"Container '{container_name}' is already running.")

        else:
            # Pull the image if it doesn't exist locally
            print(f"Pulling the Docker image '{image_name}'...")
            client.images.pull(image_name)

            # Create and start a new MongoDB container
            print(f"Creating and starting a new MongoDB container '{container_name}'...")
            container = client.containers.run(
                image_name,
                name=container_name,
                ports={f"{container_port}/tcp": host_port},
                volumes={"mongo-data": {"bind": "/data/db", "mode": "rw"}},
                detach=True,
                remove=False  # Do not auto-remove to allow reconnection later
            )

        print(f"MongoDB container '{container_name}' is running on port {host_port}.")
        return f"mongodb://localhost:{host_port}/?directConnection=true"

    except docker.errors.DockerException as e:
        print(f"Docker error: {e}\nVerify Docker Desktop or Docker Daemon is running!")
        return None


def test_mongo_connection(connection_string):
    try:
        # Connect to MongoDB using the provided connection string
        conn = MongoClient(connection_string)
        
        # Test connection by fetching server info
        server_info = conn.server_info()
        print("Successfully connected to MongoDB!")
        print("MongoDB version:", server_info["version"])
        
        # Access a database and collection (example)
        db = conn["query_responses"]
        collection = db["responses"]
        
        # Print available databases as a test
        print("Databases:", conn.list_database_names())
        
        return db, collection

    except Exception as e:
        print(f"Connection failed: {e}")
        return None, None


# Main function to set up or connect to MongoDB and test the connection
def main():
    connection_string = setup_or_connect_mongo_container()
    
    if connection_string:
        print("Waiting for MongoDB to initialize...")
        time.sleep(10)  # Wait for initialization
        
        db, collection = test_mongo_connection(connection_string)
        
        # Explicitly check if db and collection are not None
        if db is not None and collection is not None:
            print("MongoDB is ready to use!")
            
            # Example: Insert a document into the collection
            example_data = {
                "question": "What are the advantages of using MongoDB?",
                "response": "MongoDB offers flexibility and scalability.",
                "timestamp": time.time()
            }
            
            insert_result = collection.insert_one(example_data)
            print(f"Inserted document ID: {insert_result.inserted_id}")
            
            # Example: Query the inserted document
            query_result = collection.find_one({"question": "What are the advantages of using MongoDB?"})
            print("Query result:", query_result)
        
        else:
            print("Failed to connect to MongoDB.")
    else:
        print("Failed to set up or connect to MongoDB.")


# Run the main function
if __name__ == "__main__":
    main()


Checking for existing MongoDB container...
Container 'local2817' is already running.
MongoDB container 'local2817' is running on port 50882.
Waiting for MongoDB to initialize...
Successfully connected to MongoDB!
MongoDB version: 8.0.4
Databases: ['admin', 'config', 'local']
MongoDB is ready to use!
Inserted document ID: 679f2bb770ae42d7ab3b2037
Query result: {'_id': ObjectId('679f2bb770ae42d7ab3b2037'), 'question': 'What are the advantages of using MongoDB?', 'response': 'MongoDB offers flexibility and scalability.', 'timestamp': 1738484663.3823261}


In [55]:
#One at a time submission of queries to Deepseek and write response to MongoDB 

import requests
from pymongo import MongoClient
from datetime import datetime

# 1. Connect to MongoDB
def connect_to_mongodb():
    try:
        # Connect to the local MongoDB instance
        client = MongoClient("mongodb://localhost:50882/?directConnection=true")
        print("Successfully connected to MongoDB!")
        return client
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return None

# 2. Save question, thinking, and response to MongoDB
def save_to_mongodb(client, question, thinking, response):
    try:
        # Access the database and collection
        db = client["query_responses"]
        collection = db["responses"]

        # Create a document to insert
        document = {
            "question": question,
            "thinking": thinking,
            "response": response,
            "timestamp": datetime.now()
        }

        # Insert the document into the collection
        result = collection.insert_one(document)
        print(f"Saved to MongoDB with document ID: {result.inserted_id}")
    except Exception as e:
        print(f"Error saving to MongoDB: {e}")

# 3. Extract "thinking" and "response" from raw content
def process_response(raw_content):
    try:
        # Split the content at "</think>"
        parts = raw_content.split("</think>")
        
        if len(parts) == 2:
            thinking = parts[0].strip()  # Everything up to "</think>"
            response = parts[1].strip()  # Everything after "</think>"
        else:
            thinking = None
            response = raw_content.strip()  # If no "</think>", treat entire content as response
        
        return thinking, response
    
    except Exception as e:
        print(f"Error processing response: {e}")
        return None, raw_content.strip()

# 4. Define function to query DeepSeek API and save results
def ask_question_and_save_to_mongodb(client, question):
    # Define the DeepSeek API endpoint
    url = "http://127.0.0.1:11434/v1/chat/completions"

    # Define the message payload
    data = {
        "model": "deepseek-r1:7b",
        "messages": [{"role": "user", "content": question}]
    }

    try:
        # Send the request
        response = requests.post(url, json=data)

        # Handle the response
        if response.status_code == 200:
            # Extract the assistant's message content
            raw_content = response.json()['choices'][0]['message']['content']

            # Process the raw content to separate thinking and response
            thinking, formatted_response = process_response(raw_content)

            # Print a nicely formatted output
            print("### Assistant's Thinking ###\n")
            print(thinking)
            print("\n### Assistant's Response ###\n")
            print(formatted_response)

            # Save the question, thinking, and response to MongoDB
            save_to_mongodb(client, question, thinking, formatted_response)
        
        else:
            print("Error:", response.text)
    
    except Exception as e:
        print(f"An error occurred while querying DeepSeek API: {e}")

# 5. Main Execution in Jupyter Notebook
# Connect to MongoDB (run this cell first)
client = connect_to_mongodb()

# Ask a question and save it to MongoDB (run this cell for each question)
if client:
    question = "we in Technical Services are seeking comparative analysis or benchmark data on MongoDB vs our competition for pricing of Technical Services. Competitors such as DataDog, DataBricks, Snowflake, some Postgres vendors and the database offerings for the Hyperscalers (Dynamo in AWS etc.)"
    ask_question_and_save_to_mongodb(client, question)

# Close the connection when you're done (optional)
client.close()


Successfully connected to MongoDB!
### Assistant's Thinking ###

<think>
Alright, so I need to figure out how to approach this request about comparing MongoDB's pricing against our competition. The user is in Technical Services and wants comparative analysis or benchmark data specifically on service pricing. They mentioned DataDog, DataBricks, Snowflake, some Postgres vendors, and hyperscaler offerings like Dynamo in AWS.

First, I should break down the user's objective: they're likely looking to position MongoDB as more cost-effective than their competitors. This could be for proposal purposes or to market MongoDB better internally. They might not just want raw numbers but also insights into when and why we offer what we do.

I need to think about who the key players are in this space. DataDog is known for monitoring, so maybe they use it alongside MongoDB. Snowflake and AWS DynamoDB have pricing calculators on their websites. Postgres offers pricing models based on usage or queries p

In [54]:
#Multiple async question submission and async write to MongoDB

# Install required libraries
#!pip install motor==3.3 pymongo==4.5 asyncio aiohttp nest_asyncio

import asyncio
import aiohttp
from motor.motor_asyncio import AsyncIOMotorClient
from datetime import datetime
import nest_asyncio

# Apply nest_asyncio to allow nested event loops in Jupyter Notebook
nest_asyncio.apply()

# 1. Connect to MongoDB asynchronously
async def connect_to_mongodb():
    try:
        # Use the async MongoDB client from motor
        client = AsyncIOMotorClient("mongodb://localhost:50882/?directConnection=true")
        print("Successfully connected to MongoDB!")
        return client
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return None

# 2. Save question, thinking, and response to MongoDB asynchronously
async def save_to_mongodb(client, question, thinking, response):
    try:
        # Access the database and collection
        db = client["query_responses"]
        collection = db["responses"]

        # Create a document to insert
        document = {
            "question": question,
            "thinking": thinking,
            "response": response,
            "timestamp": datetime.now()
        }

        # Insert the document into the collection asynchronously
        result = await collection.insert_one(document)
        print(f"Saved to MongoDB with document ID: {result.inserted_id}")
    except Exception as e:
        print(f"Error saving to MongoDB: {e}")

# 3. Extract "thinking" and "response" from raw content
def process_response(raw_content):
    try:
        # Split the content at "</think>"
        parts = raw_content.split("</think>")
        
        if len(parts) == 2:
            thinking = parts[0].strip()  # Everything up to "</think>"
            response = parts[1].strip()  # Everything after "</think>"
        else:
            thinking = None
            response = raw_content.strip()  # If no "</think>", treat entire content as response
        
        return thinking, response
    
    except Exception as e:
        print(f"Error processing response: {e}")
        return None, raw_content.strip()

# 4. Fetch a single question's response asynchronously
async def fetch_response(session, url, question):
    data = {
        "model": "deepseek-r1:7b",
        "messages": [{"role": "user", "content": question}]
    }
    try:
        async with session.post(url, json=data) as response:
            if response.status == 200:
                raw_content = await response.json()
                raw_message = raw_content['choices'][0]['message']['content']
                thinking, formatted_response = process_response(raw_message)
                return question, thinking, formatted_response
            else:
                print(f"Error for question '{question}': {await response.text()}")
                return question, None, None
    except Exception as e:
        print(f"An error occurred for question '{question}': {e}")
        return question, None, None

# 5. Main function to handle multiple questions asynchronously
async def ask_questions_and_save(questions):
    url = "http://127.0.0.1:11434/v1/chat/completions"

    # Connect to MongoDB asynchronously
    client = await connect_to_mongodb()
    if not client:
        return

    async with aiohttp.ClientSession() as session:
        # Create tasks for fetching responses asynchronously
        fetch_tasks = [fetch_response(session, url, question) for question in questions]

        # Wait for all fetch tasks to complete
        responses = await asyncio.gather(*fetch_tasks)

        # Create tasks for saving responses to MongoDB asynchronously
        save_tasks = [
            save_to_mongodb(client, question, thinking, response)
            for question, thinking, response in responses if thinking is not None and response is not None
        ]

        # Wait for all save tasks to complete
        await asyncio.gather(*save_tasks)

    print("All questions processed and saved.")
    client.close()

# 6. Run the script with multiple questions in Jupyter Notebook
questions_list = [
    "What are the advantages of using MongoDB over a relational database",
    "we in Technical Services are seeking comparative analysis or benchmark data on MongoDB vs our competition for pricing of Technical Services. Competitors such as DataDog, DataBricks, Snowflake, some Postgres vendors and the database offerings for the Hyperscalers (Dynamo in AWS etc.)",
    "MongoDB EA license cost and Feature comparison with Microsoft SQL Enterprise with Software Assurance?",
    "Evaluate Atlas to see if there's a path forward compared to DocumentDB for a customer called Allegiant Air",
    "my customer is currently using both MongoDB and BigTable. They are reviewing and needing a cost analysis on having the bigtable nwl running on Mongodb and also review potential areas of optimization",
    "do we have proof points with quantifiable metrics to support Oracle, Sybase or AzureSQL offload to MongoDB?",
    "Do we have proof points (banks) where MongoDB have beaten Cockroach DB & Aerospike DB in # ODS, # Customer journies (Credit Card + Loans) # Content Management System"
    ]

await ask_questions_and_save(questions_list)

Successfully connected to MongoDB!
An error occurred for question 'we in Technical Services are seeking comparative analysis or benchmark data on MongoDB vs our competition for pricing of Technical Services. Competitors such as DataDog, DataBricks, Snowflake, some Postgres vendors and the database offerings for the Hyperscalers (Dynamo in AWS etc.)': 
An error occurred for question 'my customer is currently using both MongoDB and BigTable. They are reviewing and needing a cost analysis on having the bigtable nwl running on Mongodb and also review potential areas of optimization': 
Saved to MongoDB with document ID: 679f2d4870ae42d7ab3b203a
Saved to MongoDB with document ID: 679f2d4870ae42d7ab3b2039
Saved to MongoDB with document ID: 679f2d4870ae42d7ab3b203c
Saved to MongoDB with document ID: 679f2d4870ae42d7ab3b203b
Saved to MongoDB with document ID: 679f2d4870ae42d7ab3b203d
All questions processed and saved.


In [57]:
#Connect to the MongoDB collection create a search index on

conn = MongoClient("mongodb://localhost:50882/?directConnection=true")

db = conn["query_responses"]  # Database name
collection = db["responses"]  # Collection name

def create_search_index(collection):
    try:
        # Define the search index configuration
        index_config = {
            "mappings": {
                "dynamic": False,  # Disable dynamic mapping to avoid field limit issues
                "fields": {
                    "response": {"type": "string"},  # Index specific fields
                    "thinking": {"type": "string"}
                }
            }
        }

        # Send the createSearchIndexes command
        result = db.command(
            "createSearchIndexes",
            collection.name,
            indexes=[
                {
                    "name": "default",   # Name of the search index
                    "definition": index_config,
                }
            ]
        )
        print("Search index created:", result)
    except Exception as e:
        print("Error creating search index:", e)

# Call the function to create a search index on 'responses' collection
create_search_index(collection)

Search index created: {'indexesCreated': [{'id': '67a01650e65e610d34eb395f', 'name': 'default'}], 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1738544717, 1), 'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', 'keyId': 0}}, 'operationTime': Timestamp(1738544717, 1)}


In [58]:
#Example search

def run_search_query(collection, search_term):
    try:
        pipeline = [
            {
                "$search": {
                    "index": "default",
                    "text": {
                        "query": search_term,
                        "path": "response"  # Field to search in
                    }
                }
            },
            {
                "$project": {
                    "_id": 0,
                    "response": 1  # Include only the response field in results
                }
            }
        ]

        results = collection.aggregate(pipeline)
        print(f"Search results for '{search_term}':")
        for result in results:
            print(result)

    except Exception as e:
        print(f"Error running search query: {e}")

# Run a full-text search query on 'responses' collection
run_search_query(collection, "MongoDB advantages")


Search results for 'MongoDB advantages':
{'response': "**Advantages of Using MongoDB Over Relational Databases**\n\n1. **Flexible Data Modeling:**\n   - MongoDB is a NoSQL database that supports schema-less operations, allowing data to be structured without predefined schemas, which is ideal for unstructured or semi-structured data.\n\n2. **Scalability and Performance:**\n   - MongoDB scales horizontally using document stores (JSON-based), enabling it to handle large datasets efficiently with in-Memory storage and index-free queries, making it faster for big data applications.\n\n3. **Full Document Support:**\n   - Ideal for JSON-like data, handling mixed formats natively without the need for ETL processes or upfront table structures.\n\n4. **High Availability:**\n   - Supports high availability through replication systems, ensuring data redundancy and preventing single-point failures when properly configured.\n\n5. **Ease of Integration:**\n   - Works seamlessly with modern applicatio