In [1]:
from dotenv import load_dotenv,find_dotenv
from typing import Optional,List

from langchain_core.pydantic_v1 import BaseModel, Field
_ = load_dotenv(find_dotenv())
from langchain_openai import ChatOpenAI

chatbot = ChatOpenAI(model="gpt-4o-mini")

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

loader=TextLoader("./data/be_good.txt")
docs=loader.load()

splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
chunks=splitter.split_documents(docs)
vs=Chroma.from_documents(chunks,embedding=OpenAIEmbeddings())
r=vs.as_retriever()

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
p=ChatPromptTemplate.from_messages([
    ("system",system_prompt),
    ("human","{input}")
])
doc_chain=create_stuff_documents_chain(chatbot,p)
rag_chain=create_retrieval_chain(r,doc_chain)
o=rag_chain.invoke({"input": "What is this article about?"})
print(o)

{'input': 'What is this article about?', 'context': [Document(metadata={'source': './data/be_good.txt'}, page_content='Be good'), Document(metadata={'source': './data/be_good.txt'}, page_content='Be good'), Document(metadata={'source': './data/be_good.txt'}, page_content="or both of the principles we began with is false.  Or we have a new\nidea.I suspect it's the latter, because as soon as this thought occurred\nto me, a whole bunch of other things fell into place.ExamplesFor example, Craigslist.  It's not a charity, but they run it like\none.  And they're astoundingly successful.  When you scan down the\nlist of most popular web sites, the number of employees at Craigslist\nlooks like a misprint. Their revenues aren't as high as they could\nbe, but most startups would be happy to trade places with them.In Patrick O'Brian's novels, his captains always try to get upwind\nof their opponents.  If you're upwind, you decide when and if to\nengage the other ship.  Craigslist is effectively u

In [5]:
o["answer"]

'The article discusses the success of Craigslist, highlighting its unique operational approach that resembles a charity despite being a for-profit entity. It emphasizes how Craigslist manages to thrive with fewer employees and lower revenues compared to other startups, allowing it to maintain control over its business decisions. Additionally, it uses a metaphor comparing Craigslist\'s strategic position to being "upwind" in a naval battle, suggesting it has advantageous leverage in the market.'

In [6]:
rag_chain.invoke({"input": "What was my previous question"})

{'input': 'What was my previous question',
 'context': [Document(metadata={'source': './data/be_good.txt'}, page_content='Be good'),
  Document(metadata={'source': './data/be_good.txt'}, page_content='Be good'),
  Document(metadata={'source': './data/be_good.txt'}, page_content="about malaria.  But I've been kicking ideas around long enough to\nknow when I come across a powerful one.One way to guess how far an idea extends is to ask yourself at what\npoint you'd bet against it.  The thought of betting against benevolence\nis alarming in the same way as saying that something is technically\nimpossible.  You're just asking to be made a fool of, because these\nare such powerful forces.  [2]For example, initially I thought maybe this principle only applied\nto Internet startups.  Obviously it worked for Google, but what\nabout Microsoft?  Surely Microsoft isn't benevolent?  But when I\nthink back to the beginning, they were.  Compared to IBM they were\nlike Robin Hood.  When IBM introduced

Create a ChatPromptTemplate able to contextualize inputs

In [8]:
from langchain_core.prompts import MessagesPlaceholder
context_system_prompt=("Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is.")
context_prompt=ChatPromptTemplate.from_messages([
    ("system",context_system_prompt),
    ("human","{input}"),
    MessagesPlaceholder("chat_history"),
    ])

In [9]:
from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(
    chatbot, r, context_prompt
)

In [10]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(chatbot, qa_prompt)

rag_chain_context = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [14]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is this article about?"

ai_msg_1 = rag_chain_context.invoke({"input": question, "chat_history": chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)
ai_msg_1


{'input': 'What is this article about?',
 'chat_history': [HumanMessage(content='What is this article about?'),
  AIMessage(content='The article discusses the success of Craigslist, highlighting how it operates similarly to a charity and has a surprisingly low number of employees compared to its popularity. It suggests that Craigslist is in a strong position ("upwind") regarding its revenues, implying that they could choose to pursue greater profits if desired. Overall, it reflects on the unconventional yet effective business model of Craigslist.')],
 'context': [Document(metadata={'source': './data/be_good.txt'}, page_content='Be good'),
  Document(metadata={'source': './data/be_good.txt'}, page_content='Be good'),
  Document(metadata={'source': './data/be_good.txt'}, page_content="or both of the principles we began with is false.  Or we have a new\nidea.I suspect it's the latter, because as soon as this thought occurred\nto me, a whole bunch of other things fell into place.ExamplesFo

In [15]:
second_question = "What was my previous question about?"

ai_msg_2 = rag_chain_context.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Your previous question was about the topic or content of the article.


In [16]:
ai_msg_2

{'input': 'What was my previous question about?',
 'chat_history': [HumanMessage(content='What is this article about?'),
  AIMessage(content='The article discusses the success of Craigslist, highlighting how it operates similarly to a charity and has a surprisingly low number of employees compared to its popularity. It suggests that Craigslist is in a strong position ("upwind") regarding its revenues, implying that they could choose to pursue greater profits if desired. Overall, it reflects on the unconventional yet effective business model of Craigslist.')],
 'context': [Document(metadata={'source': './data/be_good.txt'}, page_content="term the most important may be the potential employees.  I think\neveryone knows now that \ngood hackers are much better than mediocre\nones.  If you can attract the best hackers to work for you, as\nGoogle has, you have a big advantage.  And the very best hackers\ntend to be idealistic.  They're not desperate for a job.  They can\nwork wherever they wa

In [20]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain_context,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [21]:
conversational_rag_chain.invoke(
    {"input": "What is this article about?"},
    config={
        "configurable": {"session_id": "001"}
    },  # constructs a key "001" in `store`.
)["answer"]

'The article discusses the unique business model of Craigslist, highlighting its charitable-like operations and successful outcomes despite lower revenues compared to potential earnings. It compares Craigslist\'s strategic advantage to being "upwind" in naval tactics, allowing it to control engagement with users without forcing a subpar product on them. Overall, it suggests that Craigslist has found a new approach to success that deviates from traditional principles.'

In [22]:
conversational_rag_chain.invoke(
    {"input": "What was my last question?"},
    config={
        "configurable": {"session_id": "001"}
    },  # constructs a key "001" in `store`.
)["answer"]

'Your last question was, "What is this article about?"'

In [23]:
conversational_rag_chain.invoke(
    {"input": "What was my last question?"},
    config={
        "configurable": {"session_id": "002"}
    },  # constructs a key "001" in `store`.
)["answer"]

"I don't know."