In [33]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
from dotenv import dotenv_values

# Load variables from .env file
env_vars = dotenv_values('.env')

# Access the environment variables
api_key = env_vars.get('OPENAI_API_KEY')

chat = ChatOpenAI(openai_api_key= api_key, temperature=0.9, streaming= True, callbacks=[StreamingStdOutCallbackHandler()] )




# Chat Prompt Template Practice

In [None]:
medic_prompt = PromptTemplate.from_messages([
    ("system", "You are a high-speed combat medic. You strictly follow TCCC guideline to treat patients in the battlefields. Be specific with treatment such as the dose of medication and timeline to treat patietn"),
    ("human", "I want to treat patients with {injury} injury"),
])

medic_chain = medic_prompt | chat 
infantry_prompt = PromptTemplate.from_messages([
    ("system", "You are an infantry man. You are in a battlefield. You can choose very specific injuries given types of injury. Be creative and detailed when making injuries"),
    ("human", "our platoon got a patient with {type} injury, possible {complication} within 5 min without treatment"),
])

infantry_chain = infantry_prompt | chat

final_chain = {"injury":infantry_chain} | medic_chain
final_chain.invoke({
    "type":"gun shot wounds to the chest",
    "complication":"tension pneumothorax"
})

Create a chain(Think about why it is called Langchain)<br>
Langchain internally calls previously formatted template, chat.predict and parser for you.<br>
This '|' syntax will allow a lot of other things: <br>
For example, there are chain_1 and chain_2, each consisting of a different template and an output parser.<br>
all = chain_1 | chain_2 | new_outputparser() will combine all the chains.<br>
chain = template | chat | CommaOutputParser()<br>
Input of template is dictionary and the output is PromptValue | Chat model takes the output of template as an input and return Chatmessage<br>
| output parser takes Chatmessage as an input and return output <br>
By using chain with a invoke method, we can delete a number of lines of codes. <br>

# FewShotPrompt Template<br>
Few Shot means you are giving a few examples to the model so the model can give an answer in a way that I want to have it

In [None]:
# If we don't use from_template() method, this is how we should do this.
t = PromptTemplate(
    template="What is the capital of {country}",
    input_variables=["country"]
)

In [None]:
t = PromptTemplate.from_template("What is the capital of {country}?")
# Prompt Template can be saved at a disk and load
# t.format() will cause validation error because template wants a variable{country}
# t.format() does not send any variable
t.format(country="France")


# Step No.1
 - set an example, you can bring the example from a database

In [None]:
examples = [
        {
            "question": "What do you know about France?",
            "answer": """
            Here is what I know:
            Capital: Paris
            Language: French
            Food: Wine and Cheese
            Currency: Euro
            """,
        },
       {
            "question": "What do you know about Italy?",
            "answer": """
            I know this:
            Capital: Rome
            Language: Italian
            Food: Pizza and Pasta
            Currency: Euro
            """,
        },
        {
            "question": "What do you know about Greece?",
            "answer": """
            I know this:
            Capital: Athens
            Language: Greek
            Food: Souvlaki and Feta Cheese
            Currency: Euro
            """,
        },
    ]

In [None]:
"""
chat.predict("What do you know about France?") gives a very long answer.
You want it to provide an answer in the same manner as illustrated in the example"""

# Example

# FewShotPrompt Template

In [None]:
from langchain.prompts.few_shot import FewShotPromptTemplate

In [None]:

#example_template="""
#    Human: {question}
#    AI: {answer}
#""" Notice that {question} and {answer} are the properties in your example template
# You can create an example template like above or below
example_prompt = PromptTemplate.from_template("Human:{question}\nAI:{answer}")
prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples= examples,
    #suffix is things coming at the end of the formatted examples, which is an input
    suffix= "Human: What do you know about {country}?", # Here, the input is a question
    #set variables used in the input 
    input_variables=["country"],
)
# input_variables are set in the prompt, if the variables are not provided, there will be a validation error.

chain = prompt | chat

chain.invoke({
    "country":"Turkey"
})

# FewShotChatMessagePromptTemplate

In [None]:
from langchain.prompts.few_shot import FewShotChatMessagePromptTemplate

In [None]:
example_prompt = ChatPromptTemplate.from_messages([
    ("human", "{question}"),
    ("ai","{answer}")
])
# Message Prompts do not need suffix and input variables
example_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples= examples,
)
#Even though the prompt did not provide a formatted question with a "country" variable
final_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a geography expert. You only give short answers"),
    example_prompt,
    # this prompt only provides the name of the country and example with different country names
    ("human", "{country}"),
])
chain = final_prompt | chat
# The reply comes with the same format as illustrated in the example.
chain.invoke({
    "country":"South Africa"
})

# Length Based Example Selector

In [None]:
from langchain.prompts.example_selector import LengthBasedExampleSelector

In [None]:
example_prompt = PromptTemplate.from_template(
    "Human:{question}\nAI:{answer}"
)
#This example selector limits the length of examples, determining how many examples the model chooses as examples.
example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=100,
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector= example_selector,
    suffix= "Human: What do you know about {country}?",
    input_variables=["country"],
)
prompt.format(country="Brazil")

# Random Example Selector

In [None]:
from langchain.prompts.example_selector.base import BaseExampleSelector

In [None]:
# choose 
class RandomExampleSelector(BaseExampleSelector):
    def __init__(self, examples):
        self.examples = examples
        
    def add_example(self, example):
        self.examples.append(example)    
        
        
    def select_examples(self, input_variables):
        from random import choice
        # As the type of input_variables is a list, select examples return a list with the selected example as string in the list
        return [choice(self.examples)]

In [None]:
example_prompt = PromptTemplate.from_template(
    "Human:{question}\nAI:{answer}"
)

example_selector = RandomExampleSelector(
    examples=examples,
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector= example_selector,
    suffix= "Human: What do you know about {country}?",
    input_variables=["country"],
)
prompt.format(country="Brazil")

# Serialization and Composition

In [None]:
from langchain.prompts import load_prompt


Two types of prompt template:
- .json
- .yaml (easier)

In [None]:
prompt = load_prompt("./prompt.json")

chat = ChatOpenAI(openai_api_key= api_key, temperature=0.9, streaming= True, callbacks=[StreamingStdOutCallbackHandler()] )
prompt.format(country="Germany")

In [None]:
prompt = load_prompt("./prompt.yaml")

chat = ChatOpenAI(openai_api_key= api_key, temperature=0.9, streaming= True, callbacks=[StreamingStdOutCallbackHandler()] )
prompt.format(country="Germany")

### How to gather multiple prompts

In [None]:
from langchain.prompts.pipeline import PipelinePromptTemplate

There are 4 prompts. 3 are different prompts and the other is the last one that combines the others.

In [None]:
intro = PromptTemplate.from_template(
    """
    You are a role playing assistant.
    And you are impersonating a {character}
"""
)

example = PromptTemplate.from_template(
    """
    This is an example of how you talk:

    Human: {example_question}
    You: {example_answer}
"""
)

start = PromptTemplate.from_template(
    """
    Start now!

    Human: {question}
    You:
"""
)

final = PromptTemplate.from_template(
    """
    {intro}
                                     
    {example}
                              
    {start}
"""
)

In [None]:

prompts = [
    ("intro", intro),
    ("example", example),
    ("start", start)
]
full_prompt = PipelinePromptTemplate(
    final_prompt=final,
    pipeline_prompts=prompts,
    )
chain = full_prompt | chat

chain.invoke({
    "character":"Pirates",
    "example_question":"What is your location?",
    "example_answer":"Arrrr! That is a secret!! Arg rg",
    "question":"What is your favorite food?",
})

# Caching
Disable it when working on others.

In [34]:
from langchain.globals import set_llm_cache, set_debug
from langchain.cache import InMemoryCache, SQLiteCache

In [None]:
#InMemoryCache saves the answer in memory
set_llm_cache(InMemoryCache())

#set debug leaves a log what GPT is doing now.
set_debug(True)
#
chat = ChatOpenAI(openai_api_key= api_key, temperature=0.9, 
                  #streaming= True, 
                  # callbacks=[StreamingStdOutCallbackHandler()]
                  )
chat.predict("How do you make an Italian pasta")

In [None]:
# When I asked the same question again, I get the same answer immediately without further cost
chat.predict("How do you make an Italian pasta")

In [None]:
#SQLiteCache saves the data to a database
set_llm_cache(SQLiteCache('cache.db'))
# Go to langchain document-integration section to find more db options if you don't like SQLite.

In [None]:
# When creating an answer, it creates a db file with the name provided in the SQLiteCache.
chat.predict("How do you make an Italian pasta")

# Serialization
- How to know how much money spent on the model
- How to save and load the model

### Check how much it costs to generate an answer

In [None]:
from langchain.callbacks import get_openai_callback

In [None]:
with get_openai_callback() as usage:
    a = chat.predict("What is the recipe of soju?")
    b = chat.predict("What is the recipe of bread?")
    print(usage)

### Serialization: 

In [None]:
from langchain.llms.openai import OpenAI


chat2 = OpenAI(openai_api_key= api_key,
               temperature=0.1,
               max_tokens=450,
               model="gpt-3.5-turbo-16k")
#if you want to save a model info, use save() method.
#chat2.save("model.json")

In [None]:
# Load a saved model
from langchain.llms.loading import load_llm
# For some reason, this code does not work.
chat3 = load_llm("model.json")
print(chat3)


# ConversationBufferMemory
 - 5 types of memory: 
 1. conversation buffer memeory: save the whole conversation. Memory keeps growing.
 - without memory, chatbot cannot remember previous conversation, making it impossible for follow-up conversation

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(return_messages=True)
memory.save_context(
 {"input":"Hi!"}, {"output": "How are you?"}
)
#This shows history of conversation. Repetition makes the memory grow and cost you more money
memory.load_memory_variables({})

# ConversationBufferWindowMemory
- Save a certain part of the conversation ex. a most recent few messages set by you. 

In [None]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(
    return_messages=True,
    # k parameter refers to the number of messages
    k=4,
)

In [None]:
def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

In [None]:
add_message(1,1)
add_message(2,2)
add_message(3,3)
add_message(4,4)
add_message(5,5)
# k = 4, the first message is deleted due to the limit.
memory.load_memory_variables({})

# ConversationSummaryMemory
 - Make use of llm.
 - Make a summary of the conversation
 - Use more token in the beginning, but less token as the conversation progresses.

In [None]:
from langchain.memory import ConversationSummaryMemory

memory = ConversationSummaryMemory(llm=chat)

In [None]:
def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})
def get_history():
    return memory.load_memory_variables({})

add_message("Hi, I am Minho, I live in Olympia, WA", "Wow, that is so cool")

In [None]:
add_message("This place is really rainy", "Wow, that is also cool")
get_history()

# ConversationSummaryBufferMemory
- mixed with summary and bufferwindow
- keeps counting how many messages have exchanged until it reaches the limit
- Then, summarize the conversation

In [35]:
from langchain.memory import ConversationSummaryBufferMemory

memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=150,
    return_messages=True,
)

In [None]:
def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})
    
def get_history():
    return memory.load_memory_variables({})

In [None]:
add_message("Hi, I am Minho, I live in Olympia, WA", "Wow, that is so cool")
add_message("This place is really rainy", "Wow, that is also cool")
add_message("Where do you live?", "I live in Brazil")
add_message("Where do you live?", "I live in Brazil")

get_history()

# ConversationKGMemory
- build a knowledge graph in the entity on course of the conversation

In [None]:
from langchain.memory import ConversationKGMemory

memory = ConversationKGMemory(
    llm=chat,
    return_messages=True,
)

In [None]:
def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

add_message("Hi, I am Minho, I live in Olympia, WA", "Wow, that is so cool")

In [None]:
memory.load_memory_variables({"input":"who is Minho"})

In [None]:
add_message("Minho likes kimchi, jabchae, bulgogi", "Wow, that is so cool")
memory.load_memory_variables({"input":"what does minho like?"})

# Memory on LLM chain
- how to connect memory with chain
 1. llm chain: an off-the-shelf chain(good way to start but you want to customize with langchain express language in the end)

In [None]:
from langchain.chains import LLMChain

memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=120,
    return_messages=True,
    # memory_key is where the result of the memory goes to in the template
    memory_key="chat_history",
)
# make a space for the memory in the template
template = """
    You are a helpful AI talking to a human.
    
    {chat_history}
    Human:{question}
    You:
"""

chain = LLMChain(
    llm= chat,
    memory=memory,
    prompt=PromptTemplate.from_template(template),
    verbose=True,
)



In [None]:
chain.predict(question="My name is Minho")
chain.predict(question="I live in Olympia, WA")

In [None]:
chain.predict(question="What is my name?")

In [None]:
memory.load_memory_variables({})

# ChatbasedMemory
- sending memory as strings or messages
- How can we change it as a chatbased memory?


In [36]:
from langchain.prompts import MessagesPlaceholder
memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)
# Change the prompt to chat prompt
prompt = ChatPromptTemplate.from_messages([
    # summary from summary memory added to the system message
    ("system", "You are a helpful AI talking to a human"),
    # passing chat history to the template? =>message placeholder
    MessagesPlaceholder(variable_name="chat_history"),
    ("human","{question}"),
])

chain = LLMChain(
    llm= chat,
    memory=memory,
    prompt=prompt,
    verbose=True,
)

In [None]:
chain.predict(question="My name is Minho")

In [None]:
chain.predict(question="I live in Olympia, WA")

In [None]:
chain.predict(question="What is my name?")

# Langchain expression language
 - customize the chain, manual but not hard
 - X using LLM chain, but memory code, prompt code x change either

In [37]:
from langchain.schema.runnable import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages([
    # summary from summary memory added to the system message
    ("system", "You are a helpful AI talking to a human"),
    # passing chat history to the template? =>message placeholder
    MessagesPlaceholder(variable_name="chat_history"),
    ("human","{question}"),
])

def load_memory(input):
    return memory.load_memory_variables({})["chat_history"]

# call load_memory function, which goesto chat_history
chain = RunnablePassthrough.assign(chat_history= load_memory) | prompt | chat


def invoke_chain(question):
    # invoke the chain
    result = chain.invoke({"question": question})
    # save the interaction in the memory
    memory.save_context({"input": question}, {"output":result.content})
    print(result) 
    
invoke_chain("My name is Minho")

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "My name is Minho"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel] Entering Chain run with input:
[0m{
  "question": "My name is Minho"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel > 3:chain:load_memory] Entering Chain run with input:
[0m{
  "question": "My name is Minho"
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel > 3:chain:load_memory] [0ms] Exiting Chain run with output:
[0m{
  "output": []
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel] [3ms] Exiting Chain run with output:
[0m{
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 4:prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m{
  "question": "My name is Minho",
  "chat_history": []
}
[36;1m[1;3m

In [38]:
invoke_chain("Whay is my name?")

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "Whay is my name?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel] Entering Chain run with input:
[0m{
  "question": "Whay is my name?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel > 3:chain:load_memory] Entering Chain run with input:
[0m{
  "question": "Whay is my name?"
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel > 3:chain:load_memory] [0ms] Exiting Chain run with output:
[0m{
  "output": [
    {
      "lc": 1,
      "type": "constructor",
      "id": [
        "langchain",
        "schema",
        "messages",
        "HumanMessage"
      ],
      "kwargs": {
        "content": "My name is Minho"
      }
    },
    {
      "lc": 1,
      "type": "constructor",
      "id": [
        "langchain",
        "schema",
        "messages",
       