<a href="https://colab.research.google.com/github/sunshine66980/LLM/blob/main/RAG_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# 1. 安装依赖（重启运行时后运行此单元格）
!pip install -U transformers FlagEmbedding faiss-cpu sentence-transformers langchain accelerate requests python-dotenv langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-n

In [5]:
 # 临时设置
%env SILICONFLOW_API_KEY=sk-zqervmplsjboenurtvppdvsnuhfkhxkefmyowxcmfvgtcvbx
!echo $SILICONFLOW_API_KEY           # 验证

import os
print("当前API_KEY:", os.getenv("SILICONFLOW_API_KEY"))  # 确认密钥加载

env: SILICONFLOW_API_KEY=sk-zqervmplsjboenurtvppdvsnuhfkhxkefmyowxcmfvgtcvbx
sk-zqervmplsjboenurtvppdvsnuhfkhxkefmyowxcmfvgtcvbx
当前API_KEY: sk-zqervmplsjboenurtvppdvsnuhfkhxkefmyowxcmfvgtcvbx


In [6]:

# 2. 导入库
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
import requests
import os
from dotenv import load_dotenv
import numpy as np

# 3. 初始化BGE-M3嵌入模型（使用T4 GPU加速）
model_name = "BAAI/bge-m3"
model_kwargs = {"device": "cuda"}  # 使用GPU加速
encode_kwargs = {
    "normalize_embeddings": True,  # 归一化向量提升精度
    "query_instruction": ""        # BGE-M3需要空指令
}

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
print("✅ BGE-M3嵌入模型加载完成，使用T4 GPU加速")

# 4. 配置硅基流动API（在Colab左侧钥匙图标中添加环境变量SILICONFLOW_API_KEY）
load_dotenv()
api_key = os.getenv("SILICONFLOW_API_KEY")
if not api_key:
    raise ValueError("请添加SILICONFLOW_API_KEY环境变量")

# 硅基流动API调用函数
def call_siliconflow(prompt: str, model="deepseek-ai/DeepSeek-R1"):
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 500
    }
    try:
        response = requests.post(
            "https://api.siliconflow.cn/v1/chat/completions",
            headers=headers,
            json=data,
            timeout=30
        )
        response.raise_for_status()  # 检查HTTP状态码
        response_data = response.json()
        if "choices" in response_data:
            return response_data["choices"][0]["message"]["content"]
        else:
            raise ValueError(f"无效响应结构: {response_data}")
    except requests.exceptions.RequestException as e:
        print(f"API请求失败: {e}\n响应文本: {response.text if 'response' in locals() else '无响应'}")
        return "请求失败，请检查网络或API配置"
    except ValueError as e:
        print(f"API响应解析失败: {e}\n原始响应: {response.text}")
        return "解析响应时发生错误"

# 5. 准备知识库文档
documents = [
    "BGE-M3支持多向量检索和稀疏检索，适用于长文档处理（最大支持8192 tokens）",
    "在知识图谱构建中，BGE-M3能有效合并相似实体（如‘孙悟空’和‘悟空’）",
    "硅基流动的DeepSeek-R1模型支持128K上下文长度，适合处理超长文本",
    "RAG通过检索外部知识库增强生成结果，减少模型幻觉",
    "教学时建议分块大小为500-1000字符，重叠50字符以保持语义连贯"
]

# 6. 文本分块处理
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,      # 分块大小
    chunk_overlap=50,    # 重叠字符数
    length_function=len  # 长度计算函数
)
texts = text_splitter.create_documents(documents)
print(f"📚 知识库分割为 {len(texts)} 个文本块")

# 7. 创建向量数据库（FAISS）
vector_db = FAISS.from_documents(texts, embeddings)
retriever = vector_db.as_retriever(search_kwargs={"k": 3})  # 返回Top3结果
print("🔍 FAISS向量数据库构建完成")

# 8. 检索增强生成函数
def rag_query(question: str):
    # 检索相关文档
    retrieved_docs = retriever.get_relevant_documents(question)
    context = "\n".join([doc.page_content for doc in retrieved_docs])

    # 构造增强提示
    prompt = f"""基于以下上下文回答问题：
{context}
问题：{question}
要求：回答需简洁准确，并标注引用来源编号[1-3]"""

    # 调用大模型生成答案
    answer = call_siliconflow(prompt)

    # 打印结果
    print("="*50)
    print(f"❓ 问题: {question}")
    print("-"*50)
    print(f"📄 检索到的上下文:\n{context}")
    print("-"*50)
    print(f"💡 生成的答案:\n{answer}")
    print("="*50)
    return answer

# 9. 测试RAG流程
rag_query("BGE-M3如何处理长文档？")
rag_query("RAG在教学中有哪些优势？")

  embeddings = HuggingFaceBgeEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

✅ BGE-M3嵌入模型加载完成，使用T4 GPU加速
📚 知识库分割为 5 个文本块
🔍 FAISS向量数据库构建完成


  retrieved_docs = retriever.get_relevant_documents(question)


❓ 问题: BGE-M3如何处理长文档？
--------------------------------------------------
📄 检索到的上下文:
BGE-M3支持多向量检索和稀疏检索，适用于长文档处理（最大支持8192 tokens）
在知识图谱构建中，BGE-M3能有效合并相似实体（如‘孙悟空’和‘悟空’）
硅基流动的DeepSeek-R1模型支持128K上下文长度，适合处理超长文本
--------------------------------------------------
💡 生成的答案:

基于上下文，**BGE-M3 通过支持长达 ```8192 tokens``` 的文档长度来处理长文档**。

**引用来源：**[1]
API请求失败: HTTPSConnectionPool(host='api.siliconflow.cn', port=443): Read timed out. (read timeout=30)
响应文本: 无响应
❓ 问题: RAG在教学中有哪些优势？
--------------------------------------------------
📄 检索到的上下文:
RAG通过检索外部知识库增强生成结果，减少模型幻觉
硅基流动的DeepSeek-R1模型支持128K上下文长度，适合处理超长文本
在知识图谱构建中，BGE-M3能有效合并相似实体（如‘孙悟空’和‘悟空’）
--------------------------------------------------
💡 生成的答案:
请求失败，请检查网络或API配置


'请求失败，请检查网络或API配置'