In [1]:
import openai
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON, Markdown
from langchain import PromptTemplate
from langchain_core.messages import HumanMessage
from langchain_openai import AzureChatOpenAI
from langchain.chains import LLMChain

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_GPT35_DEPLOYMENT_NAME = os.getenv("OPENAI_GPT35_DEPLOYMENT_NAME")


In [2]:
def init_llm(model=OPENAI_GPT35_DEPLOYMENT_NAME,
             deployment_name=OPENAI_GPT35_DEPLOYMENT_NAME,
             openai_api_version="2024-02-15-preview",
             temperature=0,
             max_tokens=400
             ):

    llm = AzureChatOpenAI(deployment_name=deployment_name,
                            model=model,
                            openai_api_version=openai_api_version,
                            azure_endpoint=OPENAI_DEPLOYMENT_ENDPOINT,
                            temperature=temperature,
                            max_tokens=max_tokens
                            )
    return llm

llm = init_llm()

### **Retain conversation history** 

##### The Large Language Models (LLMs) are stateless. This means that they don’t retain any information about the conversation history.
Each transaction is independent of the previous one. Chatbots keep in memory the conversation history and use it to generate the next response. This is why they are able to generate more coherent responses.

Previously we saw that a model fails to answer the question that requires context. We can solve this problem by retaining the conversation history. We can do this by using the LangChain ConversationBufferMemory.

In [3]:
template = """You are a {profession} answering users questions. 
            More specifically, you are an expert in {expertise}. Answer in a clear and concise manner. Assume that the user is not a subject expert.
            If a question is not clear or not related to {expertise} say: it's not clear or the question is not related to {expertise}.

            Current conversation:
            {history}
            
            USER: {question}
            ASSISTANT:
            """

In [4]:
# Now we can override it and set it to "AI Assistant"
from langchain.prompts.prompt import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain

template = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI Assistant:"""

PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)

conversation = ConversationChain(
    prompt=PROMPT,
    llm=llm,
    verbose=False,
    memory=ConversationBufferMemory(memory_key="history", ai_prefix="AI Assistant", human_prefix="Human"),
)
res = conversation.invoke(input="Hello!")
display(Markdown(res["response"]))

Hello! How can I assist you today?

In [5]:
res = conversation.invoke(input="I need you to tell me a joke!")
display(Markdown(res["response"]))

Sure, I'd be happy to tell you a joke! Here it is: Why don't scientists trust atoms? Because they make up everything!

In [6]:
res = conversation.invoke(input="Now do one about Portugal!")
display(Markdown(res["response"]))

Of course! Here's a joke about Portugal: Why did the Portuguese football team bring string to the game? Because they wanted to tie the score!

In [7]:
res = conversation.invoke(input="I want it to be about Lisbon.")
display(Markdown(res["response"]))

Sure, here's a joke about Lisbon: Why did the baker in Lisbon always win awards? Because he always kneaded the dough-lightful pastries!

In [8]:
conversation.memory.chat_memory.messages

[HumanMessage(content='Hello!'),
 AIMessage(content='Hello! How can I assist you today?'),
 HumanMessage(content='I need you to tell me a joke!'),
 AIMessage(content="Sure, I'd be happy to tell you a joke! Here it is: Why don't scientists trust atoms? Because they make up everything!"),
 HumanMessage(content='Now do one about Portugal!'),
 AIMessage(content="Of course! Here's a joke about Portugal: Why did the Portuguese football team bring string to the game? Because they wanted to tie the score!"),
 HumanMessage(content='I want it to be about Lisbon.'),
 AIMessage(content="Sure, here's a joke about Lisbon: Why did the baker in Lisbon always win awards? Because he always kneaded the dough-lightful pastries!")]

#### More conversation memory types.

Keeping full conversation history could be expensive and we can hit the Azure API limits. LangChain provides different types of conversation memory that can be used to keep the conversation history and mitigate the limits issues.

**1. ConversationBufferWindowMemory** - keeps the last N messages in the conversation history.

**2. ConversationTokenBufferMemory** - keeps the last N tokens in the conversation history.

**3. ConversationSummaryBufferMemory** - keeps summary of the conversation over time.

Additional Memory Type includes:

**4. Vector data memory** - stires the text in a vector DB and retrieves most semantically similar text.

**5. Entity memories** - use LLM which rememebers details about specific entities. 

Note: You can use multiple memories at the same time. 

Finally you can store the conevrsation history in a conventional database like SQL or NoSQL. 


In [9]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(k=3)
memory.save_context({"input": "hi"}, {"output": "hello"})
memory.save_context({"input": "What's your name"},
                    {"output": "My name is John"})

memory.load_memory_variables({})

{'history': "Human: hi\nAI: hello\nHuman: What's your name\nAI: My name is John"}