In [1]:
import os
import os
from dotenv import load_dotenv
# Load environment variables from openai.env file
load_dotenv("deepseek.env")

# Read the OPENAI_API_KEY from the environment
api_key = os.getenv("DEEPSEEK_API_KEY")
api_base = os.getenv("DEEPSEEK_API_BASE")
serp_api_key = os.getenv("SERPAPI_API_KEY")
os.environ["OPENAI_API_KEY"] = api_key
os.environ["OPENAI_API_BASE"] = api_base
os.environ["SERPAPI_API_KEY"] =serp_api_key
# 为 DeepSeek 配置（可选）
DEEPSEEK_CONFIG = {
    #"model": "gpt-3.5-turbo",  # 或者使用 DeepSeek 兼容模型
    #"openai_api_key": os.environ["OPENAI_API_KEY"],
    # 如果使用 DeepSeek:
    "model": "deepseek-chat",
    "openai_api_base": os.environ["DEEPSEEK_API_BASE"],
    "openai_api_key": os.environ["DEEPSEEK_API_KEY"]
}

# LCEL(LangChain Expression Language)
- 一个最简单示例
<hr>

In [7]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOpenAI

prompt = ChatPromptTemplate.from_template("给我讲一个关于 {topic}的笑话")
model = ChatOpenAI(**DEEPSEEK_CONFIG)
output_parser = StrOutputParser()

chain = prompt | model | output_parser

chain.invoke({"topic": "冰激凌"})

'好的！这里有一个关于冰淇淋的冷笑话，保证让你“透心凉”：\n\n---\n\n**顾客**：老板，你们这冰淇淋怎么融化得这么快啊？  \n**老板**：哦，因为它用的是“热”带水果做的！  \n\n（注：热带水果本身并不“热”，但老板故意用双关语制造幽默，暗示冰淇淋“太热”所以化了。）\n\n---\n\n希望这个笑话能给你带来一丝凉快的快乐！ 😄🍦'

Prompt

In [8]:
prompt_value = prompt.invoke({"topic": "刺猬"})
prompt_value

ChatPromptValue(messages=[HumanMessage(content='给我讲一个关于 刺猬的笑话', additional_kwargs={}, response_metadata={})])

In [9]:
prompt_value.to_messages()

[HumanMessage(content='给我讲一个关于 刺猬的笑话', additional_kwargs={}, response_metadata={})]

In [10]:
prompt_value.to_string()

'Human: 给我讲一个关于 刺猬的笑话'

model

In [11]:
message = model.invoke(prompt_value)
message

AIMessage(content='好的！来一个关于刺猬的可爱笑话：\n\n---\n\n**刺猬去面试**  \n刺猬去应聘快递员，面试官看完简历摇摇头说："你的技能不错，但有个问题——你每次送货都会把包装盒扎满洞。"  \n\n刺猬很委屈："这不能怪我……是你们非要让我‘带刺’上岗的！"  \n\n---\n\n（笑点解析：双关语——"带刺"既指刺猬的生理特点，又暗指职场中说话尖锐的性格，结果闹出物理意义上的误会😂）  \n\n需要更冷或更热门的动物笑话风格可以告诉我哦！', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 134, 'prompt_tokens': 13, 'total_tokens': 147, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}, 'prompt_cache_hit_tokens': 0, 'prompt_cache_miss_tokens': 13}, 'model_name': 'deepseek-chat', 'system_fingerprint': 'fp_8802369eaa_prod0623_fp8_kvcache', 'finish_reason': 'stop', 'logprobs': None}, id='run--f38b0118-df5a-4433-95e9-648354d4f7b6-0')

使用llm的区别

In [12]:
from langchain_openai.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo-instruct")
llm.invoke(prompt_value)

UnprocessableEntityError: Failed to deserialize the JSON body into the target type: prompt: invalid type: sequence, expected a string at line 1 column 43

Output parser

In [None]:
output_parser.invoke(message)

'一只兔子和一只刺猬赛跑，刺猬以微弱的优势赢了。\n\n兔子很不服气，说：“你只是运气好，我一定能赢你。”\n\n刺猬笑了笑，说：“好啊，那我们再比一次。”\n\n兔子问：“你为什么笑？”\n\n刺猬回答：“因为我知道，无论我跑多慢，你都会在我背后。” \n\n兔子：“为什么？”\n\n刺猬：“因为你不敢超过我，怕被我刺到。”'

LCEL的Pipeline：
- ![Alt Text](lcel01.png)

# RAG Search Exampl
- 建立向量数据
- 使用RAG增强
<hr>

In [13]:
! pip install --upgrade --quiet  langchain langchain-openai faiss-cpu tiktoken

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
doctran 0.0.14 requires openai<0.28.0,>=0.27.8, but you have openai 1.99.1 which is incompatible.
doctran 0.0.14 requires pydantic<2.0.0,>=1.10.9, but you have pydantic 2.11.7 which is incompatible.
doctran 0.0.14 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.11.0 which is incompatible.[0m[31m
[0m

In [10]:
from operator import itemgetter

from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

vectorstore = FAISS.from_texts(
    ["harrison worked at kensho"], embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [11]:
chain.invoke("where did harrison work?")

'Harrison worked at Kensho.'

自定义也非常简单

In [12]:
template = """Answer the question based only on the following context:
{context}

Question: {question}

Answer in the following language: {language}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "language": itemgetter("language"),
    }
    | prompt
    | model
    | StrOutputParser()
)

In [13]:
chain.invoke({"question": "where did harrison work", "language": "chinese"})

'Harrison 在 Kensho 工作。'