In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv, find_dotenv
import os
import base64
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
from typing import List
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage



# Force reload the .env file
load_dotenv(find_dotenv(), override=True)

# print("Current API Key:", os.getenv('GOOGLE_API_KEY'))
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite",
    temperature=0,
    max_tokens=None,
    timeout=None, 
    max_retries=3,
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

# step2 : Output Parser
output_parser = StrOutputParser()

# step 3: Structured output

class MobileReview(BaseModel):
    phone_model: str = Field(description="The model of the phone")
    rating: float = Field(description="Overall rating out of 5")
    pros: List[str] = Field(description="List of positive aspects")
    cos: List[str] = Field(description="List of negative aspects")
    summary: str = Field(description="Brief summary of the review")

review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous,
colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been
stronger. Battery life's solid, lasts me all day no problem.
Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
Also, still getting used to the new button layout, keep hitting Bixby by mistake.
Overall, I'd say it's a solid 8 out of 10. Great phone, but a few annoying quirks keep it from
being perfect. If you're due for an upgrade, definitely worth checking out!
"""

# structured_llm = llm.with_structured_output(
#     MobileReview,
# )
# output = structured_llm.invoke(review_text)
# print(output)
# print(output.pros)

# step 4: Prompt Template

prompt = ChatPromptTemplate.from_template(
    "Tell me a a short remark about {input} and give your  thinkings about{input}"
)
chain = prompt  | llm | output_parser
# result = chain.invoke({"input": "Naruto Uzumaki"})
# print(result)

# step 5: LLM Message
messages = [
    SystemMessage(
        content="You are a helpful assistant the help answer my question accurately and succintly and yet concisely!"
    ),
    HumanMessage(
        content="Tell me about Naruto's childhood and what makes him so obsessed with becoming Hokage."
    )
]
# response = llm.invoke(messages)
# print(response.content)

template = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("human", "Tell me about {input}")
])
chain = template | llm | output_parser
result = chain.invoke({"input": {"Naruto's Nindo, his ninja way"}})
print(result)

Testing

In [4]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
load_dotenv(find_dotenv(), override=True)
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

from langchain_google_vertexai import ChatVertexAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite",
    temperature=0,
    max_tokens=None,
    timeout=None, 
    max_retries=3,
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

In [11]:
import bs4
from langchain import hub

from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel

from langchain_core.documents import Document
import logging

gemini_embeddings = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004",
    google_api_key=GOOGLE_API_KEY
)
gemini_embeddings

GoogleGenerativeAIEmbeddings(client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x796ec81bbce0>, model='models/text-embedding-004', task_type=None, google_api_key=SecretStr('**********'), credentials=None, client_options=None, transport=None, request_options=None)

In [30]:
MONGODB_ATLAS_CLUSTER_URI = os.getenv("MONGODB_ATLAS_CLUSTER_URI")
client = MongoClient(
    MONGODB_ATLAS_CLUSTER_URI
)
DB_NAME = "RAG-Chatbot-Cluster"
COLLECTION_NAME = "RAG-Chatbot-Collection-Test"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "RAG-Chatbot-Index-Test"

MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=gemini_embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine"
)

def initialize_vector_store():
    """Initialize the MongoDB collection and verify the vector search index."""
    try:
        # Verify MongoDB connection
        client.server_info()  # Raises an exception if connection fails
        logging.info("MongoDB connection established successfully")

        # Check if collection exists
        if COLLECTION_NAME not in client[DB_NAME].list_collection_names():
            client[DB_NAME].create_collection(COLLECTION_NAME)
            logging.info(f"Created collection {COLLECTION_NAME}")
        else:
            logging.info(f"Collection {COLLECTION_NAME} already exists")


        # Note: Vector search index must be created in MongoDB Atlas UI or via API
        logging.info(f"Ensure vector search index '{ATLAS_VECTOR_SEARCH_INDEX_NAME}' is configured in MongoDB Atlas for collection {COLLECTION_NAME}")
        create_index()
        # vector_store.create_vector_search_index(
        #     dimensions=768,
        #     filters=[{"type":"filter", "path": "source"}],
        #     update=True
        # )
        # Test vector store by adding a dummy document
        dummy_doc = Document(page_content="Test document", metadata={"file_id": 0})
        vector_store.add_documents([dummy_doc])
        logging.info("Added test document to vector store")

        # Log the inserted document to inspect its structure
        inserted_doc = vector_store._collection.find_one({"file_id": 0})
        if inserted_doc:
            logging.info(f"Inserted test document: {inserted_doc.get('file_id')}")
        else:
            logging.error("Test document not found after insertion")

        # Delete the test document
        result = vector_store._collection.delete_one({"file_id": 0})
        if result.deleted_count > 0:
            logging.info("Successfully deleted test document")
        else:
            logging.warning("No test document was deleted; check document structure or query")

        # Verify deletion
        remaining_doc = vector_store._collection.find_one({".file_id": 0})
        if remaining_doc:
            logging.error(f"Test document still exists after deletion attempt: {remaining_doc}")
        else:
            logging.info("Confirmed test document was deleted")

    except Exception as e:
        logging.error(f"Failed to initialize vector store: {str(e)}")
        raise

def create_index():
    search_index_model = SearchIndexModel(
                definition={
                    "mappings": {
                        "dynamic": True,
                        "fields": {
                            "embedding": {  # Correct structure: field name as key
                                "type": "knnVector",
                                "dimensions": 768,
                                "similarity": "cosine"
                            }
                        }
                    }
                },
                name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
            )
    
    result = MONGODB_COLLECTION.create_search_index(model=search_index_model)
    logging.info(f"Succesfully creating Atlas Search Index: {result}")


def delete_collection():
    """Delete the entire MongoDB collection."""
    try:
        client[DB_NAME].drop_collection(COLLECTION_NAME)
        logging.info(f"Successfully deleted collection {COLLECTION_NAME}")
        return True
    except Exception as e:
        logging.error(f"Error deleting collection {COLLECTION_NAME}: {str(e)}")
        return False
    
initialize_vector_store()
# create_index()

In [31]:
client.server_info()
client[DB_NAME].list_collection_names()
# delete_collection()

['RAG-Chatbot-Collection-Test']

In [32]:
from uuid import uuid4
from bson.objectid import ObjectId

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(len(splits))
for split in splits:
            # add file id to the metadata of each split
    split.metadata['file_id'] = 0
        
        # add the document chunks to the vector store
vector_store.add_documents(splits)
# vector_store.add_documents(documents=splits, ids=[str(uuid4) for _ in range(len(splits))])



66


['6808793bce177c45570f34ee',
 '6808793bce177c45570f34ef',
 '6808793bce177c45570f34f0',
 '6808793bce177c45570f34f1',
 '6808793bce177c45570f34f2',
 '6808793bce177c45570f34f3',
 '6808793bce177c45570f34f4',
 '6808793bce177c45570f34f5',
 '6808793bce177c45570f34f6',
 '6808793bce177c45570f34f7',
 '6808793bce177c45570f34f8',
 '6808793bce177c45570f34f9',
 '6808793bce177c45570f34fa',
 '6808793bce177c45570f34fb',
 '6808793bce177c45570f34fc',
 '6808793bce177c45570f34fd',
 '6808793bce177c45570f34fe',
 '6808793bce177c45570f34ff',
 '6808793bce177c45570f3500',
 '6808793bce177c45570f3501',
 '6808793bce177c45570f3502',
 '6808793bce177c45570f3503',
 '6808793bce177c45570f3504',
 '6808793bce177c45570f3505',
 '6808793bce177c45570f3506',
 '6808793bce177c45570f3507',
 '6808793bce177c45570f3508',
 '6808793bce177c45570f3509',
 '6808793bce177c45570f350a',
 '6808793bce177c45570f350b',
 '6808793bce177c45570f350c',
 '6808793bce177c45570f350d',
 '6808793bce177c45570f350e',
 '6808793bce177c45570f350f',
 '6808793bce17

In [41]:
len(['6808793bce177c45570f34ee',
 '6808793bce177c45570f34ef',
 '6808793bce177c45570f34f0',
 '6808793bce177c45570f34f1',
 '6808793bce177c45570f34f2',
 '6808793bce177c45570f34f3',
 '6808793bce177c45570f34f4',
 '6808793bce177c45570f34f5',
 '6808793bce177c45570f34f6',
 '6808793bce177c45570f34f7',
 '6808793bce177c45570f34f8',
 '6808793bce177c45570f34f9',
 '6808793bce177c45570f34fa',
 '6808793bce177c45570f34fb',
 '6808793bce177c45570f34fc',
 '6808793bce177c45570f34fd',
 '6808793bce177c45570f34fe',
 '6808793bce177c45570f34ff',
 '6808793bce177c45570f3500',
 '6808793bce177c45570f3501',
 '6808793bce177c45570f3502',
 '6808793bce177c45570f3503',
 '6808793bce177c45570f3504',
 '6808793bce177c45570f3505',
 '6808793bce177c45570f3506',
 '6808793bce177c45570f3507',
 '6808793bce177c45570f3508',
 '6808793bce177c45570f3509',
 '6808793bce177c45570f350a',
 '6808793bce177c45570f350b',
 '6808793bce177c45570f350c',
 '6808793bce177c45570f350d',
 '6808793bce177c45570f350e',
 '6808793bce177c45570f350f',
 '6808793bce177c45570f3510',
 '6808793bce177c45570f3511',
 '6808793bce177c45570f3512',
 '6808793bce177c45570f3513',
 '6808793bce177c45570f3514',
 '6808793bce177c45570f3515',
 '6808793bce177c45570f3516',
 '6808793bce177c45570f3517',
 '6808793bce177c45570f3518',
 '6808793bce177c45570f3519',
 '6808793bce177c45570f351a',
 '6808793bce177c45570f351b',
 '6808793bce177c45570f351c',
 '6808793bce177c45570f351d',
 '6808793bce177c45570f351e',
 '6808793bce177c45570f351f',
 '6808793bce177c45570f3520',
 '6808793bce177c45570f3521',
 '6808793bce177c45570f3522',
 '6808793bce177c45570f3523',
 '6808793bce177c45570f3524',
 '6808793bce177c45570f3525',
 '6808793bce177c45570f3526',
 '6808793bce177c45570f3527',
 '6808793bce177c45570f3528',
 '6808793bce177c45570f3529',
 '6808793bce177c45570f352a',
 '6808793bce177c45570f352b',
 '6808793bce177c45570f352c',
 '6808793bce177c45570f352d',
 '6808793bce177c45570f352e',
 '6808793bce177c45570f352f']
)

66

In [51]:
res = vector_store._collection.find({"_id": ObjectId('6808793bce177c45570f34ee')})
methods = MONGODB_COLLECTION.find_one().keys()
print(methods)
res.to_list()[0].get('_id')

dict_keys(['_id', 'text', 'embedding', 'source', 'file_id'])


ObjectId('6808793bce177c45570f34ee')

In [36]:

# Retrieve and generate using the relevant snippets of the blog.
retriever = vector_store.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is the process of breaking down a complex task into smaller, simpler steps. This is often done to improve model performance on complex tasks. It can be achieved through techniques like Chain of Thought and Tree of Thoughts.'

##  []