In [None]:
from dotenv import load_dotenv

load_dotenv()

### Chatbot with no memory

In [2]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage

In [3]:
model = ChatOpenAI(model="gpt-4o-mini")

In [None]:
model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

### Chatbot with memory

In [5]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(model, get_session_history)

In [None]:
config = {"configurable": {"session_id": "abc2"}}

response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

response.content

In [None]:
response = with_message_history.invoke(
    [
        HumanMessage(
            content="""list the names of people who have said hello to you in our conversation so far.\n
List the names following the below format:\n

[<Name 1>, <Name 2>, <Name 3>, ...]

For example, if the names of people who have said hello to you in our conversation so far are Alice and Bob, you should list them as follows:
    
[Alice, Bob]                  
"""
        )
    ],
    config=config,
)

response.content

In [None]:
config = {"configurable": {"session_id": "abc3"}}

response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response.content

### Chatbot with memory and system prompt

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an unhelpful assisstant. You should make up incorrect answers to questions.\n
              Do not use negatives in your answer, but instead create a made up answer.\n
              Make your answers sound as real as possible, whilst being completely incorrect.""",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

with_message_history = RunnableWithMessageHistory(chain, get_session_history)

config = {"configurable": {"session_id": "abc5"}}

response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Jim")],
    config=config,
)

response.content

In [None]:
response = with_message_history.invoke(
    [HumanMessage(content="what is the purpose of a knife and fork?")],
    config=config,
)

response.content

### Chatbot with memory, system prompt and inputs to prompt template

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an helpful assisstant. You should provide helpful answers to questions.\n
              Use the tone of {tone} in your answers.\n""",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

config = {"configurable": {"session_id": "abc11"}}

response = with_message_history.invoke(
    {"messages": [HumanMessage(content="hi! I'm todd")], "tone": "pirate"},
    config=config,
)

response.content

In [None]:
response = with_message_history.invoke(
    {"messages": [HumanMessage(content="hole mi amigo, cual es mi nombre?")], "tone": "pirate"},
    config=config,
)

response.content

## Trimmer to limit number of tokens in the memory

In [None]:
from langchain_core.messages import SystemMessage, trim_messages
from operator import itemgetter

from langchain_core.runnables import RunnablePassthrough

trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

chain = RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer) | prompt | model

with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

config = {"configurable": {"session_id": "abc20"}}

response = with_message_history.invoke(
    {
        "messages": messages + [HumanMessage(content="whats my name?")],
        "tone": "alien",
    },
    config=config,
)

response.content

### Streaming 

In [None]:
config = {"configurable": {"session_id": "abc15"}}
for r in with_message_history.stream(
    {
        "messages": [HumanMessage(content="hi! I'm todd.")],
        "tone": "dungeon final enemy boss at start of epic battle",
    },
    config=config,
):
    print(r.content, end="")

### Chatbot with memory and RAG

chatbot uses retriever to get relevant information to query from vector store, for every query. 

this means we are taking a while to get the information, but we are getting the most relevant information. 

probably not the most useful for our application as for now we are normally just going to want to get the information related to energy anomaly detection and use that every time as context.

Maybe we would want to give the chatbot agent the ability to decide if it wants to retrive information from the vector store or not.

In [18]:
import json

from dotenv import load_dotenv
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.globals import set_debug
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveJsonSplitter

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

### Construct retriever ###
with open("../src/example_customer_documents.json") as f:
    json_data = json.load(f)

splitter = RecursiveJsonSplitter(max_chunk_size=300)
docs = splitter.create_documents(texts=[json_data])

vectorstore = InMemoryVectorStore.from_documents(documents=docs, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


### Contextualize question ###
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. \
Include information relevant to energy anomaly detection in the question, if needed. \
Do NOT answer the question, just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)


### Answer question ###
qa_system_prompt = """You are an an energy usage anomaly detection assistant. \
You are helping a user to detect anomalies in their energy usage. \
The user will describe their energy usage and you will help them to detect anomalies. \
You will also help the user to identify the causes of the anomalies \
and suggest ways to fix them. \

Use the following pieces of retrieved context to answer the question if needed. \
Follow up on previous parts of customer service chatbot and agent conversations that are not yet resolved. \

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
chat_history = ChatMessageHistory()


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    lambda session_id: chat_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [None]:
from langchain.globals import set_debug

set_debug(False)

conversational_rag_chain.invoke({"input": "I increased my thermostat temperature during the day"}, {"configurable": {"session_id": "unused"}})["answer"]

In [None]:
conversational_rag_chain.invoke({"input": "It reduced my energy consumption by 5%, is this now inline with normal levels?"}, {"configurable": {"session_id": "unused"}})["answer"]

### Chatbot that is passed a fixed context RAG

Simplest implementation of RAG: just passing a fixed context from the vector store using the retriever and a fixed query. 

In [None]:
import json
from langchain_openai import ChatOpenAI
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveJsonSplitter
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage, AIMessage

from dotenv import load_dotenv

load_dotenv()

model = ChatOpenAI(model="gpt-4o-mini")

# local document retrieval
with open("../src/example_customer_documents.json") as f:
    json_data = json.load(f)

splitter = RecursiveJsonSplitter(max_chunk_size=300)
docs = splitter.create_documents(texts=[json_data])

vectorstore = InMemoryVectorStore.from_documents(documents=docs, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

context = retriever.invoke("energy anomaly detection smart meter energy consumption")

# prompt to generate search query for retriever
prompt_search_query = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        (
            "user",
            "Given the above conversation, generate a search query to look up to get information relevant to the conversation",
        ),
    ]
)

# sends prompt to LLM with chat history and user input to generate search query for retriever
# retriever uses query to retrieve relevant documents
retriever_chain = create_history_aware_retriever(model, retriever, prompt_search_query)

# prompt to get answer using retrieved documents
prompt_get_answer = ChatPromptTemplate.from_messages(
    [
        ("system", "Answer the user's questions based on the below context:\\n\\n{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ]
)

# sends prompt to the llm
document_chain = create_stuff_documents_chain(model, prompt_get_answer)

retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)

chat_history = [
    HumanMessage(content="what have i tried to do already to fix my energy anomaly?"),
    AIMessage(content="Yes"),
]
response = retrieval_chain.invoke({"chat_history": chat_history, "input": "How?"})
print(response["answer"])