In [None]:
from dotenv import load_dotenv
import os

# 关键：这一行代码会去查找 .env 文件，并把里面的键值对加载到 os.environ 中
load_dotenv() 

# --- 加载完成后，你就不需要你原先那两行代码了 ---
# 删掉：os.environ["LANGSMITH_TRACING"] = "true"
# 删掉：os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

# 你现在可以检查一下变量是否已经成功加载
deepseek_apikey = os.getenv("DEEPSEEK_API_KEY")
if not deepseek_apikey:
    print("错误：未能在 .env 文件中找到 DEEPSEEK_API_KEY。")

key = os.getenv("LANGSMITH_API_KEY")
tracing = os.getenv("LANGSMITH_TRACING")
base_url="https://api.deepseek.com"

if key:
    print("LANGSMITH_API_KEY 已成功加载！")
    # 为了安全，只打印一部分
    print(f"Key: {key[:4]}...") 
else:
    print("错误：未能在 .env 文件中找到 LANGSMITH_API_KEY。")

print(f"Tracing 状态: {tracing}")

# LangChain 的代码现在就可以正常工作了
# ... 你的 LangChain/LangSmith 初始化代码 ...

LANGSMITH_API_KEY 已成功加载！
Key: lsv2...
Tracing 状态: true


In [2]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "../data/rag.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

  from .autonotebook import tqdm as notebook_tqdm
incorrect startxref pointer(1)
parsing for Object Streams


5


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

print(len(all_splits))

16


In [4]:
import torch
# 运行这个命令，看它是否返回 True
torch.cuda.is_available()

True

In [5]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 2060'

In [6]:
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = "Qwen/Qwen3-Embedding-0.6B"

model_kwargs = {'device': 'cuda'}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs
)

  embeddings = HuggingFaceEmbeddings(


In [7]:
from langchain_chroma import Chroma

vector_store_1000_qwen = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="../db/chroma_chunk_1000_db",  # Where to save data locally, remove if not necessary
)

vector_store_1000_qwen.add_documents(documents=all_splits)

['194c90df-003d-47ee-9465-2afe2a99d110',
 '1c9e83ec-22cd-4f4d-b4cf-a28112a10ed4',
 '7473bea1-2745-49d7-ab05-7fb097a22f6e',
 'c464ef6b-12de-42ae-a792-ac1f85aab091',
 '0dfa60b6-eea1-4a5b-92a6-aa3e8242d299',
 '16af46c1-0999-40ce-9efd-85e3d4550151',
 'ae047841-8c34-4e78-91f4-cf698bf9e534',
 '53f850a2-553d-4e56-a4c9-28724ac68ec0',
 '86f1dae0-455c-44c6-bc79-d99110071d8f',
 '9e7b5747-314a-4b54-96e6-2f5689153969',
 '64cc0e51-e907-46f1-b0fe-f15ebf54acac',
 '1229386b-9c9d-4083-8907-88612265b3ed',
 '1209c2dd-ab5e-4f00-a601-562e9cc9d6d8',
 '99c1ce32-8ec0-42e5-a919-6d0bc5850b6b',
 '06c726b1-a338-49e6-92c7-b9bd3412f3f0',
 'c5776851-2aac-4d41-8917-d57740dc4cbc']

In [8]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    return vector_store_1000_qwen.similarity_search(query, k=1)



In [9]:
# --- 1. 导入必要的“链接”工具 ---
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI # 注意：使用 ChatOpenAI，而不是 OpenAI

# --- 初始化的DeepSeek 模型 ---

llm = ChatOpenAI(
    model="deepseek-chat",
    api_key=deepseek_apikey, 
    base_url="https://api.deepseek.com" 
)

# --- 定义 RAG 链 ---
# A. Prompt 模板：告诉 LLM 如何使用上下文
template = """
你是一个问答助手。请根据下面提供的“上下文”来回答问题。
如果你在上下文中找不到答案，就说你不知道。

上下文:
{context}

问题:
{question}
"""
prompt = ChatPromptTemplate.from_template(template)

# B. 定义格式化函数：把 Document 列表转换成普通字符串
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# C. 把所有“零件”用“|”(管道)串联起来
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("✅ RAG 链创建成功！")
print(rag_chain)

✅ RAG 链创建成功！
first={
  context: RunnableLambda(retriever)
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
} middle=[ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n你是一个问答助手。请根据下面提供的“上下文”来回答问题。\n如果你在上下文中找不到答案，就说你不知道。\n\n上下文:\n{context}\n\n问题:\n{question}\n'), additional_kwargs={})]), ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001DECDC81420>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001DF2E48E680>, root_client=<openai.OpenAI object at 0x000001DECDC80EE0>, root_async_client=<openai.AsyncOpenAI object at 0x000001DF2E48E650>, model_name='deepseek-chat', model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='https://api.deepseek.com')] last=StrOutputP

In [15]:
# # 运行这条链！
# question = "今天早上吃什么"
# response = rag_chain.invoke(question)

# print(response)

In [10]:
# 实验组 A：小块
text_splitter_300 = RecursiveCharacterTextSplitter(
    chunk_size=300, chunk_overlap=50, add_start_index=True
)
all_splits_300 = text_splitter_300.split_documents(docs) # 'docs' 来自 [cell 2]

# 关键：创建一个*新*的数据库集合来存放
vector_store_300 = Chroma(
    collection_name="rag_pdf_chunk_300", # 新的名字
    embedding_function=embeddings, # 还是用 Qwen
    persist_directory="../db/chroma_chunk_300_db", # 新的目录
)
vector_store_300.add_documents(documents=all_splits_300)

['773f9386-60d4-4ac0-85af-fae71065d531',
 'fd963fda-a299-4f56-b05b-ec43928d47d7',
 '8f6b3ab9-0a74-4760-ace9-10c52da238fc',
 '9c48ac48-6c18-4c75-8926-ab1c4c410742',
 'a56d5314-8694-4680-8842-c43019fd6e21',
 'c7032830-8b6a-4db5-bbd3-e11f761eb666',
 'f22b2da9-ef40-4693-af71-d98ba8d31fa0',
 '9aff31b6-f956-484d-91bc-64761de7f8de',
 'bae53c4f-d884-4cae-8d39-f3169652608c',
 '829d25c4-a6ed-467d-855e-6d4f36d2cc23',
 '12513ff8-4ed7-4df6-a587-87a2b8e7f546',
 '8e587783-7c3e-4d89-960c-e7ae7f18f742',
 '840328c4-04f7-408a-8bf8-edae19e4bcfb',
 'c3d125ad-03bd-4078-9b5c-1443d7198158',
 '68295870-40c0-41f2-aecc-cd3ca15995ba',
 '43263ef2-d081-4640-8f7f-1b7cc452d3ad',
 '884d693b-b186-491b-ad7d-aa9d985dd188',
 '9ad65168-6a3d-431e-ab81-b684a3966237',
 '0fd5de33-3728-40de-8557-2a49e65ca79d',
 'f11ec5b5-aa42-41ab-83e5-6d4ecbdd1e41',
 '241d5ba2-8b30-4402-879b-4ae3ed26569d',
 'b5fdd203-c5bc-42e7-a7b0-edeb5799710b',
 'e151f26d-810d-4c27-abba-a6a45c778d64',
 '3b29b06f-220c-4bcd-8789-71bf6d143d31',
 'e0cff7cf-1563-

In [12]:
# 原始查询 (k=1)
retriever_1000 = vector_store_1000_qwen.as_retriever(search_kwargs={"k": 1})
print("--- 1000 Chunk 结果 ---")
print(retriever_1000.invoke("What did Ars Technica say about RAG?"))

# 新查询 (k=1)
retriever_300 = vector_store_300.as_retriever(search_kwargs={"k": 1})
print("--- 300 Chunk 结果 ---")
print(retriever_300.invoke("What did Ars Technica say about RAG?"))

--- 1000 Chunk 结果 ---
[Document(id='1c9e83ec-22cd-4f4d-b4cf-a28112a10ed4', metadata={'source': '../data/rag.pdf', 'total_pages': 5, 'page_label': '1', 'creationdate': '2025-10-31T03:11:04+08:00', 'page': 0, 'producer': 'jsPDF 3.0.1', 'creator': 'PyPDF', 'start_index': 758}, page_content='relevant text from databases, uploaded documents, or web  sources.[1] According to Ars \nTechnica, "RAG  is a way of improving LLM  performance, in essence by blending the LLM  \nprocess with a web  search or other document look-up process to help LLMs  stick to the facts."\nThis method helps reduce AI hallucinations,[3] which have caused chatbots to describe \npolicies that don\'t exist, or recommend  nonexistent legal cases to lawyers that are looking for \ncitations to support their arguments.[4]\nRAG  also reduces the need to retrain LLMs  with new  data, saving on computational and \nfinancial costs.[1] Beyond efficiency gains, RAG  also allows LLMs  to include sources in their \nresponses, so use

你现在必须站在**下一步（LLM，即你的 DeepSeek 模型）**的角度来思考。

对于 300 块 (Chunk=300) 的结果：

你（作为 DeepSeek）收到的上下文是："relevant text... Ars Technica..."

这是一个极其干净、高度聚焦的“小纸条”。

你的任务很简单：“嘿，DeepSeek，请总结一下 Ars Technica 说了什么。”

结果： DeepSeek 会给你一个非常棒的、直接的答案。

对于 1000 块 (Chunk=1000) 的结果：

你（作为 DeepSeek）收到的上下文是：

"relevant text... Ars Technica..."（这是你要的答案）

"...This method helps reduce AI hallucinations..."（这是无关噪音）

"...RAG also reduces the need to retrain LLMs..."（这是无关噪音）

"...RAG was first introduced in a 2020..."（这是无关噪音）

这是一个**非常“吵闹”、充满“噪音”**的“大文档”。

结果： DeepSeek 可能会“犯迷糊”。它在回答“Ars Technica 说了什么”时，很有可能会被后面“减少幻觉”、“2020年论文”等噪音带偏，给你一个冗长且跑题的答案。

总结
你这个实验完美证明了那个为期 2 天的计划的核心：

Chunking (分块) 是 RAG 中最重要的“权衡” (Trade-off) 之一。

小块 (Chunk=300)： 在回答**“具体细节”问题时，能提供更干净、更精确**的上下文。

大块 (Chunk=1000)： 可能会提供**“过多”的上下文，反而“污染”**了 LLM 的思考。

In [13]:
# 原始查询 (k=1)
retriever_1000 = vector_store_1000_qwen.as_retriever(search_kwargs={"k": 1})
print("--- 1000 Chunk 结果 ---")
print(retriever_1000.invoke("What is RAG and give me an example?"))

# 新查询 (k=1)
retriever_300 = vector_store_300.as_retriever(search_kwargs={"k": 1})
print("--- 300 Chunk 结果 ---")
print(retriever_300.invoke("What is RAG and give me an example?"))


--- 1000 Chunk 结果 ---
[Document(id='194c90df-003d-47ee-9465-2afe2a99d110', metadata={'page_label': '1', 'source': '../data/rag.pdf', 'start_index': 0, 'page': 0, 'producer': 'jsPDF 3.0.1', 'creator': 'PyPDF', 'total_pages': 5, 'creationdate': '2025-10-31T03:11:04+08:00'}, page_content='Retrieval-augmented generation (RAG) is a technique that enables large language models \n(LLMs) to retrieve and incorporate new  information.[1] With RAG,  LLMs  do not respond to user\nqueries until they refer to a specified set of documents. These documents supplement \ninformation from the LLM\'s pre-existing training data.[2] This allows LLMs  to use \ndomain-specific and/or updated information that is not available in the training data.[2] For \nexample, this helps LLM-based chatbots access internal company data or generate responses\nbased on authoritative sources.\nRAG  improves large language models (LLMs) by incorporating information retrieval before \ngenerating responses.[3] Unlike traditional

In [18]:
%pip install -U sentence-transformers

Collecting sympy==1.13.1 (from torch>=1.11.0->sentence-transformers)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading sympy-1.13.1-py3-none-any.whl (6.2 MB)
   ---------------------------------------- 0.0/6.2 MB ? eta -:--:--
   - -------------------------------------- 0.3/6.2 MB ? eta -:--:--
   --------------- ------------------------ 2.4/6.2 MB 9.0 MB/s eta 0:00:01
   ------------------------------ --------- 4.7/6.2 MB 9.8 MB/s eta 0:00:01
   ---------------------------------------- 6.2/6.2 MB 10.0 MB/s  0:00:00
Installing collected packages: sympy
  Attempting uninstall: sympy
    Found existing installation: sympy 1.14.0
    Uninstalling sympy-1.14.0:
      Successfully uninstalled sympy-1.14.0
Successfully installed sympy-1.13.1
Note: you may need to restart the kernel to use updated packages.


In [14]:
# 实验组 B：新模型
model_name_mini = "sentence-transformers/all-MiniLM-L6-v2"

embeddings_mini = HuggingFaceEmbeddings(
    model_name=model_name_mini,
    model_kwargs={'device': 'cuda'} # 同样用 GPU
)

# 用回你 1000 大小的切块
vector_store_mini = Chroma(
    collection_name="rag_pdf_minilm", # 新的名字
    embedding_function=embeddings_mini, # 传入新模型
    persist_directory="../db/chroma_minilm_db", # 新的目录
)
vector_store_mini.add_documents(documents=all_splits) # 'all_splits' 来自 [cell 3]

['fefbeacd-e3c0-4df4-b831-7f2336d6961a',
 'ba4398b7-88cd-481a-8885-3a23173a6dda',
 'dca30fa8-05e4-4694-b225-508f34c35229',
 'cec2561a-073c-4178-8de0-e5dd58caa510',
 'cc400814-6df9-4692-97e6-aae7b0134e8c',
 '5dc88865-52ce-4c6b-8898-d3d0972acbb0',
 'c0d759a7-11c6-4442-a5b4-cf8e59c58332',
 '4f58d890-b5ea-47e6-9165-7987bb3c7ac6',
 '0efb8ad4-0858-4bbc-8c37-60166a16d7fc',
 'aa72ea73-32b2-4833-b639-a5a757851421',
 '634bcc74-bea3-4d28-a107-93df487175d4',
 'cf3963ce-0633-4f65-be6c-e68270134c75',
 '1d57b038-6b7b-4668-ba53-a66a6434968a',
 '93a9b0ef-dafd-4e91-b318-56b3541116ea',
 'fc0944aa-ad52-4147-94d1-53ab3b0f5e13',
 'cf73cdba-9fdb-438a-9510-08b94bc8a4a7']

In [15]:
# Qwen (1024维)
results_qwen = vector_store_1000_qwen.similarity_search_with_score("What is RAG and give me an example?")
print(f"--- Qwen 结果 (Score: {results_qwen[0][1]}) ---")
print(results_qwen[0][0].page_content)

# MiniLM (384维)
results_mini = vector_store_mini.similarity_search_with_score("What is RAG and give me an example?")
print(f"--- MiniLM 结果 (Score: {results_mini[0][1]}) ---")
print(results_mini[0][0].page_content)

--- Qwen 结果 (Score: 0.4299754798412323) ---
Retrieval-augmented generation (RAG) is a technique that enables large language models 
(LLMs) to retrieve and incorporate new  information.[1] With RAG,  LLMs  do not respond to user
queries until they refer to a specified set of documents. These documents supplement 
information from the LLM's pre-existing training data.[2] This allows LLMs  to use 
domain-specific and/or updated information that is not available in the training data.[2] For 
example, this helps LLM-based chatbots access internal company data or generate responses
based on authoritative sources.
RAG  improves large language models (LLMs) by incorporating information retrieval before 
generating responses.[3] Unlike traditional LLMs  that rely on static training data, RAG  pulls 
relevant text from databases, uploaded documents, or web  sources.[1] According to Ars 
Technica, "RAG  is a way of improving LLM  performance, in essence by blending the LLM
--- MiniLM 结果 (Score: 1