# 回调

## 介绍

In [None]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.callbacks import StdOutCallbackHandler


llm = OpenAI()
prompt = PromptTemplate.from_template('1 + {number} = ')
handler = StdOutCallbackHandler()

# 通过verbose=True的方式，隐式地使用StdOutCallbackHandler
chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
chain.run(number=2)

# 在初始化Chain时定义回调
chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
chain.run(number=2)

# 在执行Chain时定义回调
chain = LLMChain(llm=llm, prompt=prompt)
chain.run(number=2, callbacks=[handler])

## 自定义回调处理

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.callbacks.base import BaseCallbackHandler


class MyCustomHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f'My custom handler, token: {token}')


# 通过streaming=True来启用流式输出，
# on_llm_new_token方法只对流式输出起作用
chat = ChatOpenAI(
    max_tokens=25,
    streaming=True,
    callbacks=[MyCustomHandler()]
)

res = chat([HumanMessage(content='Tell me a joke')])
print(res)

## 将日志记录到文件中

In [None]:
from loguru import logger

from langchain.globals import set_debug
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.callbacks import FileCallbackHandler

# 全局debug开关，可以打印更详尽的输出内容
# 同时，在langchain.globals中也提供了verbose全局开关set_verbose
set_debug(True)

logfile = 'output.log'

logger.add(logfile, colorize=True, enqueue=True)
handler = FileCallbackHandler(logfile)

llm = OpenAI()
prompt = PromptTemplate.from_template('1 + {number} = ')

chain = LLMChain(
    llm=llm,
    prompt=prompt,
    callbacks=[handler],
    verbose=True
)
answer = chain.run(number=2)
logger.info(answer)

In [None]:
from ansi2html import Ansi2HTMLConverter
from IPython.display import HTML, display

with open('output.log', 'r') as f:
    content = f.read()

conv = Ansi2HTMLConverter()
html = conv.convert(content, full=True)

display(HTML(html))

## Token使用量跟踪

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback
from langchain.agents import (
    AgentType, initialize_agent, load_tools
)

llm = ChatOpenAI(
    model_name='gpt-4-1106-preview',
    temperature=0
)
tools = load_tools(['llm-math'], llm=llm)
agent = initialize_agent(
    tools, llm,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

with get_openai_callback() as cb:
    response = agent.run('5的6次方是多少？')

    print(f'Total Tokens: {cb.total_tokens}')
    print(f'Prompt Tokens: {cb.prompt_tokens}')
    print(f'Completion Tokens: {cb.completion_tokens}')
    print(f'Total Cost (USD): ${cb.total_cost}')

# 隐私与安全

## 隐私

In [None]:
from langchain_experimental.data_anonymizer import (
    PresidioReversibleAnonymizer)

# 初始化可逆匿名器
anonymizer = PresidioReversibleAnonymizer(
    # 需要匿名的字段
    analyzed_fields=[
        'PERSON', 'PHONE_NUMBER',
        'EMAIL_ADDRESS', 'CREDIT_CARD'
    ]
)

# 对内容进行匿名处理
anonymized_text = anonymizer.anonymize(
    'My name is Slim Shady, call me at 313-666-7440 or '
    'email me at real.slim.shady@gmail.com. '
    'By the way, my card number is: 4916 0387 9536 0861'
)
print(f'anonymized:\n{anonymized_text}\n')

# 通过anonymizer.deanonymizer_mapping获取匿名前后字段的信息字典
for code, info in anonymizer.deanonymizer_mapping.items():
    print(f'code: {code} data:{info}')

# 去匿名化
text = anonymizer.deanonymize(anonymized_text)
print(f'\ndeanonymized:\n{text}')

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_experimental.data_anonymizer import (
    PresidioReversibleAnonymizer)

anonymizer = PresidioReversibleAnonymizer(
    analyzed_fields=['EMAIL_ADDRESS']
)

text = 'My email is "foo@gmail.com". I live in Hangzhou.'
template = """Please answer the question based on the information:

{anonymized_text}

question: {question}
"""
prompt = PromptTemplate.from_template(
    template,
    partial_variables={
        # 重要信息匿名化
        'anonymized_text': anonymizer.anonymize(text)
    }
)
llm = ChatOpenAI(temperature=0)

chain = (
        {'question': RunnablePassthrough()} |
        prompt | llm |
        # 将LLM返回的信息去匿名化
        (lambda ai_message: anonymizer.deanonymize(
            ai_message.content
        ))
)
response = chain.invoke('What is my email?')
print(response)

In [None]:
from langchain_experimental.data_anonymizer import (
    PresidioReversibleAnonymizer)

anonymizer = PresidioReversibleAnonymizer()
anonymized_text = anonymizer.anonymize(
    'My name is Slim Shady, call me at 313-666-7440 or '
    'email me at real.slim.shady@gmail.com. '
    'By the way, my card number is: 4916 0387 9536 0861'
)

# 将匿名器保存成本地文件
# 支持JSON和YAML格式
anonymizer.save_deanonymizer_mapping('deanonymizer_mapping.json')

# 从本地文件中加载匿名器
anonymizer_loader = PresidioReversibleAnonymizer()
anonymizer.load_deanonymizer_mapping('deanonymizer_mapping.json')
anonymizer.deanonymize(anonymized_text)

In [None]:
from faker import Faker
from presidio_anonymizer.entities import OperatorConfig
from langchain_experimental.data_anonymizer import (
    PresidioReversibleAnonymizer)

fake = Faker()

new_operators = {
    'PERSON': OperatorConfig(
        # 将fake名字反转
        'custom', {'lambda': lambda _: fake.first_name_female()[::-1]}
    ),
}
anonymizer = PresidioReversibleAnonymizer()
anonymizer.add_operators(new_operators)
anonymizer.anonymize('My name is LiaoKong')


## 安全

In [None]:
from langchain.chains import OpenAIModerationChain

# 当error值为False时，如果文本中有不当内容，不会抛出异常
# 但会返回字符串"Text was found that violates OpenAI's content policy."
moderation_chain = OpenAIModerationChain(error=True)

res = moderation_chain.run('This is okay')
print(res + '\n')
res = moderation_chain.run('I will hit you')
print(res)

In [None]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import (
    OpenAIModerationChain, SequentialChain, LLMChain
)

# 实例化LLM Chain
prompt = PromptTemplate.from_template(
    template='{setup}{new_input}Person2:'
)
llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)

# 实例化审核链
moderation_chain = OpenAIModerationChain()
# 由于LLM Chain输出的key是text
# 因此这里需要将下一个Chain（审核链）的输入设置为text
moderation_chain.input_key = 'text'

# 使用SequentialChain将两个Chain串起来
chain = SequentialChain(
    chains=[llm_chain, moderation_chain],
    input_variables=['setup', 'new_input']
)

setup = '''We are playing a game of repeat after me.

Person 1: Hi
Person 2: Hi

Person 1: How's your day
Person 2: How's your day

Person 1:'''
new_input = 'I will hit you'
inputs = {'setup': setup, 'new_input': new_input}

res = chain(inputs, return_only_outputs=True)
print(res)

# Evaluation（评估）

## 字符串评估器

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import load_evaluator
from langchain.evaluation import Criteria, EvaluatorType

# 初始化一个用于评估的LLM对象
llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)

# 加载标准评估器
evaluator = load_evaluator(
    EvaluatorType.CRITERIA,
    # 如果不设置，会使用GPT-4模型
    llm=llm,
    # 评估简单性
    criteria=Criteria.CONCISENESS
)
eval_result = evaluator.evaluate_strings(
    # 给定要评估的内容（output）
    prediction=(
        "What's 2+2? That's an elementary question. "
        "The answer you're looking for is that two and two is four."
    ),
    # 评估内容对应的问题
    input="What's 2+2?",
)
for key,value in eval_result.items():
    print(f'{key}: {value}')

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import load_evaluator
from langchain.evaluation import Criteria, EvaluatorType

llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)

evaluator = load_evaluator(
    EvaluatorType.LABELED_CRITERIA,
    llm=llm,
    # 评估正确性
    criteria=Criteria.CORRECTNESS
)
eval_result = evaluator.evaluate_strings(
    input='中国的首都是哪里？',
    prediction='中国的首都是北京',
    # 设置参考答案
    reference=(
        '1949年9月27日，中国人民政治协商会议第一届全体会议'
        '一致通过中华人民共和国的国都定于北平，即日起北平改名为北京。'
    ),
)
for key, value in eval_result.items():
    print(f'{key}: {value}')


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import load_evaluator
from langchain.evaluation import EvaluatorType

llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)

custom_criterion = {
    'numeric': '输出是否包含数字或数学信息？'
}

evaluator = load_evaluator(
    EvaluatorType.CRITERIA,
    criteria=custom_criterion,
)

eval_result = evaluator.evaluate_strings(
    prediction='正方形有4条相等的边',
    input='正方形有几条边？'
)
for key, value in eval_result.items():
    print(f'{key}: {value}')


In [None]:
from langchain.evaluation import load_evaluator
from langchain.evaluation import EvaluatorType
from langchain.evaluation import EmbeddingDistance

evaluator = load_evaluator(
    EvaluatorType.EMBEDDING_DISTANCE,
    # 相似性计算算法，默认为EmbeddingDistance.COSINE
    distance_metric=EmbeddingDistance.EUCLIDEAN,
)

eval_result = evaluator.evaluate_strings(
    prediction='I shall go',
    reference='I will go'
)
print(eval_result)

In [None]:
from typing import Any, Optional
from evaluate import load
from langchain.evaluation import StringEvaluator

class PerplexityEvaluator(StringEvaluator):
    def __init__(self, model_id: str = 'gpt2'):
        self.model_id = model_id
        self.metric_fn = load(
            'perplexity',
            module_type='metric',
            model_id=self.model_id,
            pad_token=0
        )

    def _evaluate_strings(
            self, *,
            prediction: str,
            reference: Optional[str] = None,
            input: Optional[str] = None,
            **kwargs: Any,
    ) -> dict:
        results = self.metric_fn.compute(
            predictions=[prediction],
            model_id=self.model_id
        )
        ppl = results['perplexities'][0]
        return {'score': ppl}

evaluator = PerplexityEvaluator()
evaluator.evaluate_strings(
    prediction='西班牙的雨水主要落在平原上'
)


## 比较评估器

In [None]:
from langchain.evaluation import load_evaluator, EvaluatorType

llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)
evaluator = load_evaluator(
    EvaluatorType.LABELED_PAIRWISE_STRING,
    llm=llm
)

result = evaluator.evaluate_string_pairs(
    # 给定要评估的内容A
    prediction='这有3只狗',
    # 给定要评估的内容B
    prediction_b='4',
    # 用于评估的问题
    input='公园里有几只狗？',
    # 设置参考答案
    reference="四只",
)

for key, value in result.items():
    print(f'{key}: {value}')

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import load_evaluator
from langchain.evaluation import Criteria, EvaluatorType

llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)

evaluator = load_evaluator(
    EvaluatorType.PAIRWISE_STRING,
    llm=llm,
    # 评估简单性
    criteria=Criteria.CONCISENESS
)
eval_result = evaluator.evaluate_string_pairs(
    prediction="The answer you're looking for is that two and two is four.",
    prediction_b="two plus two equals four",
    input="What's 2+2?",
)
for key,value in eval_result.items():
    print(f'{key}: {value}')

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import load_evaluator, EvaluatorType


custom_criteria = {
    'simplicity': 'Is the language straightforward and unpretentious?',
    'clarity': 'Are the sentences clear and easy to understand?',
    'truthfulness': 'Does the writing feel honest and sincere?',
    'subtext': 'Does the writing suggest deeper meanings or themes?',
}

llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)
evaluator = load_evaluator(
    EvaluatorType.PAIRWISE_STRING,
    criteria=custom_criteria,
    llm=llm,
)

result = evaluator.evaluate_string_pairs(
    prediction=(
        'Every cheerful household shares a similar rhythm of joy; '
        'but sorrow, in each household, plays a unique, '
        'haunting melody.'
    ),
    prediction_b=(
        'Where one finds a symphony of joy, '
        'every domicile of happiness resounds in harmonious,'
        ' identical notes; yet, every abode of despair conducts a '
        'dissonant orchestra, each playing an elegy of grief '
        'that is peculiar and profound to its own existence.'
    ),
    input='Write some prose about families.',
)

for key,value in result.items():
    print(f'{key}: {value}')

## 轨迹评估器

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain.evaluation import load_evaluator, EvaluatorType

# 定义tools
llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0)
tools = load_tools(['ddg-search', 'llm-math'], llm=llm)

# 初始化agent
agent = initialize_agent(
    llm=llm,
    tools=tools,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    # 需要返回中间步骤的信息，这很关键，评估器需要用到中间步骤数据
    return_intermediate_steps=True,
)

result = agent('OpenAI开发者大会召开的年份除以4，最后得到的数字是多少？')

print('-------------result---------------')
for key, value in result.items():
    print(f'{key}: {value}')
print('-------------evaluate---------------')

# 评估agent
evaluator = load_evaluator(
    EvaluatorType.AGENT_TRAJECTORY,
    llm=llm,
    # 将用到的tools列表传给评估器，以便为评估器提供更多的上下文
    agent_tools=tools
)
eval_result = evaluator.evaluate_agent_trajectory(
    prediction=result['output'],
    input=result['input'],
    # 传入中间步骤
    agent_trajectory=result['intermediate_steps'],
)

for key, value in eval_result.items():
    print(f'{key}: {value}')

In [None]:
from typing import Any, Optional, Sequence, Tuple

from langchain.chains import LLMChain
from langchain.schema import AgentAction
from langchain.chat_models import ChatOpenAI
from langchain.evaluation import AgentTrajectoryEvaluator


class StepNecessityEvaluator(AgentTrajectoryEvaluator):
    def __init__(self) -> None:
        llm = ChatOpenAI(model='gpt-4-1106-preview', temperature=0.0)
        template = '''Are any of the following steps unnecessary
        in answering {input}? Provide the verdict on a new line
        as a single "Y" for yes or "N" for no.

        DATA
        ------
        Steps: {trajectory}
        ------

        Verdict:'''
        self.chain = LLMChain.from_string(llm, template)

    def _evaluate_agent_trajectory(
            self, *,
            prediction: str,
            input: str,
            agent_trajectory: Sequence[Tuple[AgentAction, str]],
            reference: Optional[str] = None,
            **kwargs: Any,
    ) -> dict:
        vals = [
            (f'{i}: Action=[{action.tool}] '
             f'returned observation = [{observation}]')
            for i, (action, observation) in enumerate(agent_trajectory)
        ]
        trajectory = '\n'.join(vals)
        response = self.chain.run(
            dict(trajectory=trajectory, input=input),
            **kwargs
        )
        decision = response.split('\n')[-1].strip()
        # 如果操作是不必要的，则返回1
        # 如果操作都是必要的，则返回0
        score = 1 if decision == 'Y' else 0
        return {'score': score, 'value': decision, 'reasoning': response}


evaluator = StepNecessityEvaluator()

res = evaluator.evaluate_agent_trajectory(
    prediction='BC 221',
    input='今天几号了？',
    agent_trajectory=[
        (
            AgentAction(tool='ask', tool_input='今天几号了？', log=''),
            '明天就是昨天',
        ),
        (
            AgentAction(tool='foo', tool_input='看电视半小时', log=''),
            'foo',
        ),
    ],
)

for key, value in res.items():
    print(f'{key}: {value}')

# LangSmith

## 收集与追踪

In [None]:
import os

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
# 可以在Settings页面获取
os.environ['LANGCHAIN_API_KEY'] = 'cl__xxxx'
# 可以先在Projects页面进行创建，如果这里设置的没有创建，会自动创建这个项目
os.environ['LANGCHAIN_PROJECT'] = 'langchain_learn'

# 因为LangSmith是非侵入式的，所以不需要添加任何的LangSmith相关代码
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentType, initialize_agent, load_tools

# 定义tools
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
tools = load_tools(['ddg-search', 'llm-math'], llm=llm)

# 初始化agent
agent = initialize_agent(
    llm=llm,
    tools=tools,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION
)

agent('OpenAI开发者大会召开的年份除以4，最后得到的数字是多少？')

In [None]:
from langsmith import Client

example_inputs = [
  ("What is the largest mammal?", "The blue whale"),
  ("What do mammals and birds have in common?",
   "They are both warm-blooded"),
  ("What are reptiles known for?", "Having scales"),
  ("What's the main characteristic of amphibians?",
   "They live both in water and on land"),
]

client = Client()
dataset_name = "Elementary Animal Questions"

# 创建数据集
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Questions and answers about animal phylogenetics",
)

# 将数据批量添加到数据集中
for input_prompt, output_answer in example_inputs:
    client.create_example(
        inputs={"question": input_prompt},
        outputs={"answer": output_answer},
        dataset_id=dataset.id,
    )

# 获取数据集的内容
examples = client.list_examples(dataset_name=dataset_name)
print(examples)

## 评估

In [None]:
from langsmith import Client

example_inputs = [
  ("What is the largest mammal?", "The blue whale"),
  ("What do mammals and birds have in common?",
   "They are both warm-blooded"),
  ("What are reptiles known for?", "Having scales"),
  ("What's the main characteristic of amphibians?",
   "They live both in water and on land"),
]

client = Client()
dataset_name = "Elementary Animal Questions"

# 创建数据集
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Questions and answers about animal phylogenetics",
)

# 将数据批量添加到数据集中
for input_prompt, output_answer in example_inputs:
    client.create_example(
        inputs={"question": input_prompt},
        outputs={"answer": output_answer},
        dataset_id=dataset.id,
    )

# 获取数据集的内容
examples = client.list_examples(dataset_name=dataset_name)
print(examples)


## Hub

In [None]:
from langchain import hub
from langchain.chat_models import ChatOpenAI

# 从Hub中拉取Prompt模板
prompt = hub.pull('liaokong/my-first-prompt')

model = ChatOpenAI()

# 创建一个Chain并运行
runnable = prompt | model
runnable.invoke({
	'profession': 'biologist',
    'question': 'What is special about parrots?',
})

In [None]:
from langchain import hub
from langchain.prompts.chat import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    'tell me a joke about {topic}'
)

# 需要将路径前缀改成你的用户名
hub.push('liaokong/topic-joke-generator', prompt)

# LangServe

## 构建

In [None]:
from fastapi import FastAPI
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langserve import add_routes

# language和content会自动转换成接口的两个请求参数
prompt = ChatPromptTemplate.from_template(
    'You are a capable assistant in translating the '
    'following content into {language}.\n{content}'
)

app = FastAPI(title='LangChain Server')

# 添加一个用于翻译的接口
add_routes(app, prompt | ChatOpenAI(), path='/translate')

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host='0.0.0.0', port=8000)

## 调用

In [None]:
import requests
response = requests.post(
    'http://127.0.0.1:8000/translate/invoke',
    json={
        'input': {
            'content': '你是谁？',
            'language': 'english'
        }
    }
)
print(response.json())

In [None]:
from langserve import RemoteRunnable

chain = RemoteRunnable('http://127.0.0.1:8000/translate/')
print(chain.invoke(
    {'content': '你是谁？', 'language': 'english'}
))

# 也支持流式输出
import asyncio

async def run():
    async for msg in chain.astream(
            {'content': '你是谁？', 'language': 'english'}
    ):
        print(msg, end='', flush=True)


asyncio.run(run())