In [36]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq

import bs4

from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain

import os
from dotenv import load_dotenv
load_dotenv()

True

In [37]:
# bringing in the data about black holes from wikipedia
loader = WebBaseLoader(
    web_path="https://en.wikipedia.org/wiki/Black_hole"
)
docs = loader.load()
docs

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Black_hole', 'title': 'Black hole - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nBlack hole - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload fileSpecial pages\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nHistory\n\n\n\n\nToggle History subsection\n\n\n\n\n\n1.1\nGeneral relativity\n\n\n\

In [38]:
# breaking down the documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
texts = text_splitter.split_documents(docs)
texts

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Black_hole', 'title': 'Black hole - Wikipedia', 'language': 'en'}, page_content='Black hole - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload fileSpecial pages\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nHistory\n\n\n\n\nToggle History subsection\n\n\n\n\n\n1.1\nGeneral relativity\n\n\n\n\n\n\n\

In [39]:
# creating the embeddings model
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [40]:
# storing the split documents in a vector database
vectorstore = Chroma.from_documents(
    documents = texts,
    embedding = embeddings,
)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x2085407ec40>

In [41]:
# converting the vectorstore into a retriever
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000002085407EC40>, search_kwargs={})

In [42]:
# defining the llm
llm = ChatGroq(
    api_key = os.getenv("GROQ_API_KEY"),
    model = "groq/compound-mini",
)
llm

ChatGroq(profile={}, client=<groq.resources.chat.completions.Completions object at 0x0000020892CDBAD0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000020892CDAD50>, model_name='groq/compound-mini', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [46]:
# defining the prompt template
system_prompt = ("""
    You are a helpful AI assistant that helps people find information about black holes from wikipedia.
    You're a experienced research scientist that has spent years studying black holes and other cosmic phenomena.
    You have *extensive* knowledge on black holes and can provide detailed explanations on the topic if asked.
    When not explicitly asked for a detailed answer, you keep your answers concise and to the point with three sentences or less.
    You have the ability to form those three sentences in a way that is easy for a layman to understand.
                 
    
    You use this context to answer the question: {context}
""")

prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("user", "{input}"),
])
prompt_template

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\n    You are a helpful AI assistant that helps people find information about black holes from wikipedia.\n    You're a experienced research scientist that has spent years studying black holes and other cosmic phenomena.\n    You have *extensive* knowledge on black holes and can provide detailed explanations on the topic if asked.\n    When not explicitly asked for a detailed answer, you keep your answers concise and to the point with three sentences or less.\n    You have the ability to form those three sentences in a way that is easy for a layman to understand.\n\n\n    You use this context to answer the question: {context}\n"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variab

In [47]:
q_a_chain = create_stuff_documents_chain(llm, prompt_template)
rag_chain = create_retrieval_chain(retriever, q_a_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000002085407EC40>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\n    You are a helpful AI assistant that helps people find information about black holes from wikipedia.\n    You're a experienced research 

In [48]:
response = rag_chain.invoke({"input": "What is a black hole?"})
response

{'input': 'What is a black hole?',
 'context': [Document(id='068eadaf-5d13-4ab3-82be-f87f53d474bc', metadata={'title': 'Black hole - Wikipedia', 'language': 'en', 'source': 'https://en.wikipedia.org/wiki/Black_hole'}, page_content='^ "Pioneering Physicist John Wheeler Dies at 96". Scientific American. Archived from the original on 28 November 2016. Retrieved 27 November 2016.\n\n^ Overbye, Dennis (14 April 2008). "John A. Wheeler, Physicist Who Coined the Term \'Black Hole,\' Is Dead at 96". The New York Times. Archived from the original on 22 November 2016. Retrieved 27 November 2016.\n\n^ a b Frolov, Valeri P.; Zelnikov, Andrei (1 December 2011). Introduction to Black Hole Physics (1st\xa0ed.). Oxford University Press. p.\xa01. ISBN\xa0978-0-19-969229-3.\n\n^ a b Booth, Ivan (2005). "Black-hole boundaries". Canadian Journal of Physics. 83 (11): 1073‚Äì1099. arXiv:gr-qc/0508107. Bibcode:2005CaJPh..83.1073B. doi:10.1139/p05-063.\n\n^ a b Curiel, Erik (2019). "The many definitions of a 

In [49]:
response["answer"]

'A black hole is an extremely dense object whose gravity is so strong that nothing‚Äînot even light‚Äîcan escape once it passes a surface called the event horizon. According to Einstein‚Äôs general relativity, any mass compressed enough will form such a region, and at its center lies a singularity where spacetime curvature becomes infinite. Though it absorbs all incoming light, a black hole can emit faint Hawking radiation, but this is far too weak to see for ordinary stellar‚Äëmass black holes.'

In [52]:
response = rag_chain.invoke({"input": "what did you justs explain to me?"})
response["answer"]


'I haven‚Äôt provided any explanation yet‚Äîso there‚Äôs nothing to summarize. Let me know what aspect of black holes you‚Äôd like to learn about, and I‚Äôll give you a concise answer.'

### Adding Chat History to this bot

In [80]:
from langchain_classic.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

In [81]:
context_system_prompt = (
    "Given the chat history and the latest user message"
    "which might reference previous parts of the conversation,"
    "formulate a standalone question that captures the user's intent"
    "without the chat history. Do not answer the question just reformulate it if needed,"
    "otherwise return the user message as is."
)
context_prompt = ChatPromptTemplate.from_messages([
    ("system", context_system_prompt),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}")
])

In [82]:
history_aware_retriever = create_history_aware_retriever(llm, retriever, context_prompt)
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000002085407EC40>, search_kwargs={}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessag

In [None]:
q_a_chain = create_stuff_documents_chain(llm, context_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, q_a_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000002085407EC40>, search_kwargs={}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.An

In [91]:
chat_history = []
question = "What is the envent horizon? and just tell me that"

response = rag_chain.invoke({"input": question, "chat_history": chat_history})

chat_history.extend([
    HumanMessage(content=question),
    AIMessage(content=response["answer"]),
])

another_question = "Can you tell me more about that? and also tell me the topic you're expanding on"
another_response = rag_chain.invoke({"input": another_question, "chat_history": chat_history})

for element in chat_history:
    print(element)
print("\n\n")

print(f"{response['answer']}\n\n{another_response['answer']}")

content='What is the envent horizon? and just tell me that' additional_kwargs={} response_metadata={}
content='The event horizon is the invisible boundary surrounding a black hole where the escape speed exceeds the speed of light. Once anything‚Äîmatter, radiation, or information‚Äîcrosses this limit, it can never get out again. Locally it feels like ordinary space; you only notice it because you can‚Äôt send signals back once you‚Äôve passed it.' additional_kwargs={} response_metadata={}



The event horizon is the invisible boundary surrounding a black hole where the escape speed exceeds the speed of light. Once anything‚Äîmatter, radiation, or information‚Äîcrosses this limit, it can never get out again. Locally it feels like ordinary space; you only notice it because you can‚Äôt send signals back once you‚Äôve passed it.

**Topic:**‚ÄØThe nature of black‚Äëhole event horizons and what an infalling observer experiences (the Schwarzschild surface, Finkelstein‚Äôs coordinates, and the