In [None]:
!pip install langchain==0.0.240rc4


### OutputParser


In [None]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI

from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List, Dict
import json


def chinese_friendly(string):
    lines = string.split('\n')
    for i, line in enumerate(lines):
        if line.startswith('{') and line.endswith('}'):
            try:
                lines[i] = json.dumps(json.loads(line), ensure_ascii=False)
            except:
                pass
    return '\n'.join(lines)


model_name = 'gpt-4'
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)


class Command(BaseModel):
    command: str = Field(description="linux shell command")
    arguments: Dict[str, str] = Field(description="parameter (name:value)")

    @validator('command')
    def no_space(cls, field):
        if " " in field or "\t" in field or "\n" in field:
            raise ValueError("command could not contain space!")
        return field


parser = PydanticOutputParser(pydantic_object=Command)

prompt = PromptTemplate(
    template="convert user command to linux.\n{format_instructions}\n{query}",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

print(chinese_friendly(parser.get_format_instructions()))


query = "set system time is 2023-04-01"
model_input = prompt.format_prompt(query=query)

#print(parser.get_format_instructions())

print("====Prompt=====")
print(chinese_friendly(model_input.to_string()))

output = model(model_input.to_string())
print("====Output=====")
print(output)
print("====Parsed=====")
cmd = parser.parse(output)
print(cmd)


## 二、encapsule of Document

### 2.1 Document Loaders


In [None]:
!pip install pypdf


In [None]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("WhatisChatGPT.pdf")
pages = loader.load_and_split()

print(pages[0].page_content)


### 2.2 Document Processor

1：TextSplitter


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=50,
    chunk_overlap=10, 
    length_function=len,
    add_start_index=True,
)

paragraphs = text_splitter.create_documents([pages[0].page_content])
for para in paragraphs[:5]:
    print(para.page_content)
    print('-------')


例 2：Doctran


In [None]:
!pip install doctran


In [None]:
from langchain.document_transformers import DoctranTextTranslator

translator = DoctranTextTranslator(
    openai_api_model="gpt-3.5-turbo", language="chinese")

translated_document = await translator.atransform_documents(pages)

print(translated_document[0].page_content)


### 2.3 Text Embeddings


In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
text = "it is an apple"
document = "it is an apple"
query_vec = embeddings.embed_query(text)
doc_vec = embeddings.embed_documents([document])


### 2.4 Vectorstores


In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(paragraphs, embeddings)

query = "What can ChatGPT do?"
docs = db.similarity_search(query)
print(docs[0].page_content)


### 2.5 Retrievers


In [None]:
retriever = db.as_retriever()
docs = retriever.get_relevant_documents("What can ChatGPT do?")

print(docs[0].page_content)


In [None]:
from langchain.retrievers import TFIDFRetriever

retriever = TFIDFRetriever.from_documents(paragraphs)
docs = retriever.get_relevant_documents("What can ChatGPT do?")

print(docs[0].page_content)


## 三、Memory

### 3.1 ConversationBufferMemory


In [None]:
from langchain.memory import ConversationBufferWindowMemory

window = ConversationBufferWindowMemory(k=2)
window.save_context({"input": "question 1"}, {"output": "answer 1"})
window.save_context({"input": "question 2"}, {"output": "answer 2"})
window.save_context({"input": "question 3"}, {"output": "answer 3"})
print(window.load_memory_variables({}))

### 3.2 ConversationSummaryMemory


In [None]:
from langchain.memory import ConversationSummaryMemory
from langchain.llms import OpenAI

memory = ConversationSummaryMemory(
    llm=OpenAI(temperature=0),
    # buffer="The conversation is between a customer and a sales."
    buffer="english"
)
memory.save_context(
    {"input": "hello"}, {"output": "Hello, how are you doing today7"})

print(memory.load_memory_variables({}))


## 四、链架构：Chain


### 4.1 Chain example


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.9)
prompt = PromptTemplate(
    input_variables=["product"],
    template="give me the {product} price",
)

chain = LLMChain(llm=llm, prompt=prompt)

print(chain.run("apple"))


### 4.2 Chain Memeory


In [None]:
from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

template = """you are a chatbot。

{memory}
Human: {human_input}
AI:"""

prompt = PromptTemplate(
    input_variables=["memory", "human_input"], template=template
)

#memory = ConversationBufferMemory(memory_key="memory")

memory = ConversationSummaryMemory(llm=OpenAI(
    temperature=0), buffer="english", memory_key="memory")

llm_chain = LLMChain(
    llm=OpenAI(),
    prompt=prompt,
    verbose=True,
    memory=memory,
)

print(llm_chain.run("who are you"))
print("---------------")
output = llm_chain.run("what I asked？")
print(output)


### 4.3 Another Chain Example


<img src="stuffdocchain.png" style="margin-left: 0px" width=500px>

In [None]:
!pip install unstructured faiss-cpu


In [None]:
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

loader = UnstructuredMarkdownLoader("ChatALL.md")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(
    temperature=0), chain_type="stuff", retriever=db.as_retriever())

query = "ChatALL"
qa_chain.run(query)


In [None]:
print('================qa_chain===============')
print(qa_chain)
print('======combine_documents_chain==========')
print(qa_chain.combine_documents_chain.document_prompt)
print('==============llm_chain================')
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)



### 4.4 Sequential


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SimpleSequentialChain

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.9)
name_prompt = PromptTemplate(
    input_variables=["product"],
    template="come up with a name for a company which produce {product}",
)

name_chain = LLMChain(llm=llm, prompt=name_prompt)

slogan_prompt = PromptTemplate(
    input_variables=["name"],
    template="come up a  Slogan for {name} company, output format: name:slogan",
)

slogan_chain = LLMChain(llm=llm, prompt=slogan_prompt)

overall_chain = SimpleSequentialChain(
    chains=[name_chain, slogan_chain], verbose=True)

print(overall_chain.run("computer"))


### 4.5 Transform


In [None]:
import re
from langchain.chains import TransformChain, LLMChain, SimpleSequentialChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate


def anonymize(inputs: dict) -> dict:
    text = inputs["text"]
    t = re.compile(
        r'1(3\d|4[4-9]|5[0-35-9]|6[67]|7[013-8]|8[0-9]|9[0-9])\d{8}')
    while True:
        s = re.search(t, text)
        if s:
            text = text.replace(s.group(), '***********')
        else:
            break
    return {"output_text": text}


transform_chain = TransformChain(
    input_variables=["text"], output_variables=["output_text"], transform=anonymize
)

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.9)
prompt = PromptTemplate(
    input_variables=["input"],
    template="find out what is the career of the person below:\n{input}\n output JSON, job is the key",
)

task_chain = LLMChain(llm=llm, prompt=prompt)

overall_chain = SimpleSequentialChain(
    chains=[transform_chain, task_chain], verbose=True)

print(overall_chain.run("I am a dentist"))


### 4.6 Router


In [None]:
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains import ConversationChain
from langchain.llms import OpenAI
from langchain.chains.router import MultiPromptChain
import warnings
warnings.filterwarnings("ignore")


windows_template = """ you can write DOS and Windows Shell script, You can not write other language

question:
{input}
"""

linux_template = """ you can write linux Shell script, You can not write other language

question:
{input}
"""

prompt_infos = [
    {
        "name": "WindowsExpert",
        "description": "Windows Shell related questions",
        "prompt_template": windows_template,
    },
    {
        "name": "LinuxExpert",
        "description": "Linux Shell related question",
        "prompt_template": linux_template,
    },
]

llm = OpenAI()

destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    prompt_template = p_info["prompt_template"]
    prompt = PromptTemplate(template=prompt_template,
                            input_variables=["input"])
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain
default_chain = ConversationChain(llm=llm, output_key="text")

destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destinations_str)
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)
router_chain = LLMRouterChain.from_llm(llm, router_prompt)

chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=default_chain,
    verbose=True,
)

print(chain.run("help me to write a script and let windows to recalibrate time at 0 AM"))


### 4.7 APIChain

In [None]:
from langchain.chains import APIChain
from langchain.prompts.prompt import PromptTemplate


from langchain.llms import OpenAI

llm = OpenAI(temperature=0)
from langchain.chains.api import open_meteo_docs
chain_new = APIChain.from_llm_and_api_docs(llm, open_meteo_docs.OPEN_METEO_DOCS, verbose=True)
chain_new.run('Seattle temperature')

### 4.8 OpenAI Function Calling

In [None]:
from pydantic import BaseModel, Field
from typing import Optional
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import HumanMessage, SystemMessage

from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate

class Contact(BaseModel):
    """Extracting information about a contact persion."""

    name: str = Field(..., description="The person's name")
    address: str = Field(..., description="The person's address")
    tel: str = Field(None, description="The person's telephone/mobile number")

prompt_msgs = [
    SystemMessage(
        content="You are a world class algorithm for extracting information in structured formats."
    ),
    HumanMessage(
        content="Use the given format to extract information from the following input:"
    ),
    HumanMessagePromptTemplate.from_template("{input}"),
    HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt = ChatPromptTemplate(messages=prompt_msgs)
llm = ChatOpenAI(model="gpt-4-0613", temperature=0)

chain = create_structured_output_chain(Contact, llm, prompt, verbose=True)

chain.run("Mail to my address and call me，13012345678")

In [None]:
from langchain.chains import TransformChain, LLMChain, SimpleSequentialChain
from typing import Dict

def process(inputs: Dict[str,Contact])->str:
    person = inputs["contact"]
    return {"text":f"BEGIN:VCARD\nVERSION:2.1\nN:{person.name}\nADR:{person.address}\nTEL:{person.tel}\nEND:VCARD"}


transform_chain = TransformChain(
    input_variables=["contact"], output_variables=["text"], transform=process
)

overall_chain = SimpleSequentialChain(
    chains=[chain, transform_chain], verbose=True)

print(overall_chain.run("Mail to my address and call me，13012345678"))

### 4.9 Document Chains


In [None]:
from langchain.callbacks import StdOutCallbackHandler
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.base import Chain
from langchain.document_loaders import PyPDFLoader

def set_verbose_recusively(chain):
    chain.verbose = True
    for attr in dir(chain):
        if attr.endswith('_chain') and isinstance(getattr(chain,attr),Chain):
            subchain=getattr(chain,attr)
            set_verbose_recusively(subchain)

loader = PyPDFLoader("SIGDIAL2023.pdf")
documents = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    length_function=len,
    add_start_index=True,
)

paragraphs = text_splitter.create_documents(
    [d.page_content for d in documents])
# print(paragraphs)
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
db = FAISS.from_documents(paragraphs, embeddings)
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(temperature=0),
    chain_type="map_rerank",
    retriever=db.as_retriever(),
    verbose=True
)
set_verbose_recusively(qa_chain)

query = "When is the regular submission deadline? When is the ARR submission deadline?"
qa_chain.run(query)


## 五、Agent


In [None]:
from langchain import SerpAPIWrapper

search = SerpAPIWrapper()
tools = [
    Tool.from_function(
        func=search.run,
        name="Search",
        description="useful for when you need to answer questions about current events"
    ),
]


In [None]:
from langchain.tools import Tool, tool
import calendar
import dateutil.parser as parser
from datetime import date


@tool("weekday")
def weekday(date_str: str) -> str:
    """Convert date to weekday name"""
    d = parser.parse(date_str)
    return calendar.day_name[d.weekday()]


In [None]:
from langchain.agents import load_tools

tools = load_tools(["serpapi"])
tools += [weekday]


### 5.3 ReAct


<img src="ReAct.png" style="margin-left: 0px" width=500px>


In [None]:
!pip install google-search-results


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.agents import AgentType
from langchain.agents import initialize_agent

llm = ChatOpenAI(model_name='gpt-4', temperature=0)

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run("which date is micheal Jacksonès birthday")


### 5.4 OpenAI Function Calling


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.agents import AgentType
from langchain.agents import initialize_agent

llm = ChatOpenAI(model_name='gpt-4-0613', temperature=0)

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    max_iterations=2,
    early_stopping_method="generate",
)
agent.run("which date is micheal Jacksonès birthday")


### 5.5 SelfAskWithSearch

In [None]:
from langchain import OpenAI, SerpAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

llm = OpenAI(temperature=0)
search = SerpAPIWrapper()
tools = [
    Tool(
        name="Intermediate Answer",
        func=search.run,
        description="useful for when you need to ask with search",
    )
]

self_ask_with_search = initialize_agent(
    tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True
)
self_ask_with_search.run(
    "who is the main actors of Titanic"
)

### 5.6 Plan-and-Execute


<img src="PlanExec.png" style="margin-left: 0px" width=500px>


In [None]:
!pip install langchain-experimental


In [None]:
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.agents import load_tools
from langchain import SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain.llms import OpenAI
from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory

llm = ChatOpenAI(model_name='gpt-4', temperature=0)

search = SerpAPIWrapper(params={
    'engine': 'google', 
    'gl': 'cn', 
    'google_domain': 'google.com.hk', 
    'hl': 'zh-cn'
})

tools = [
    Tool(
        name="Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

planner = load_chat_planner(llm)
executor = load_agent_executor(llm, tools, verbose=True)
agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)

agent.run("give me a report of the weather of seattle")


## 六、Callbacks


In [None]:
class BaseCallbackHandler:
    """Base callback handler that can be used to handle callbacks from langchain."""

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        """Run when LLM starts running."""

    def on_chat_model_start(
        self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], **kwargs: Any
    ) -> Any:
        """Run when Chat Model starts running."""

    def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
        """Run on new LLM token. Only available when streaming is enabled."""

    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
        """Run when LLM ends running."""

    def on_llm_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when LLM errors."""

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        """Run when chain starts running."""

    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
        """Run when chain ends running."""

    def on_chain_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when chain errors."""

    def on_tool_start(
        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
    ) -> Any:
        """Run when tool starts running."""

    def on_tool_end(self, output: str, **kwargs: Any) -> Any:
        """Run when tool ends running."""

    def on_tool_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when tool errors."""

    def on_text(self, text: str, **kwargs: Any) -> Any:
        """Run on arbitrary text."""

    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
        """Run on agent action."""

    def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
        """Run on agent end."""


In [None]:
from langchain.callbacks import StdOutCallbackHandler
from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from typing import List, Dict, Any


class myhandler(BaseCallbackHandler):
    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        print(f"Feed LLM with {prompts}")

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        print(f"Chain Start: {inputs}")

    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
        print(f"Done!")

    def on_text(self, text: str, **kwargs: Any) -> Any:
        print(f"On text: {text}")


handler = myhandler()

llm = OpenAI()
prompt = PromptTemplate.from_template("1 + {number} = ")

chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
x = chain.run(number=1)

chain = LLMChain(llm=llm, prompt=prompt)
x = chain.run(number=2, callbacks=[handler])
