In [29]:
import os
import sys
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

# 加载.env文件中的环境变量
load_dotenv()
# print(os.environ["LANGCHAIN_TRACING_V2"])

# embedding = OpenAIEmbeddings(api_key=os.environ["API_KEY"], base_url="https://api.siliconflow.cn/v1", model="BAAI/bge-large-zh-v1.5")

embedding = OpenAIEmbeddings(base_url="http://192.168.88.15:9001")

documents = [
    Document(
        page_content="狗是忠诚和友善的动物，是人类的好朋友。",
        metadata={"source": "哺乳动物类宠物文档"},
    ),
    Document(
        page_content="猫是独立性很强的动物，通常喜欢自己的空间。",
        metadata={"source": "哺乳动物类宠物文档"},
    ),
    Document(
        page_content="金鱼是相对容易饲养的鱼类，是很受新手欢迎的宠物。",
        metadata={"source": "鱼类宠物文档"},
    ),
    Document(
        page_content="鹦鹉是具有高智商的鸟类，能够模仿人类说话。",
        metadata={"source": "鸟类宠物文档"},
    ),

]

vectorstore = Chroma.from_documents(
    documents,
    embedding=embedding,
    persist_directory="./chroma_db"
)

In [30]:
vectorstore.similarity_search("金鱼")

[Document(id='af540e7c-ab1a-4b9d-bd9d-b76cbcdf74f5', metadata={'source': '鱼类宠物文档'}, page_content='金鱼是相对容易饲养的鱼类，是很受新手欢迎的宠物。'),
 Document(id='6502aed4-54d4-42fc-8313-8a566ffb122d', metadata={'source': '鸟类宠物文档'}, page_content='鹦鹉是具有高智商的鸟类，能够模仿人类说话。'),
 Document(id='73d8123c-74af-49df-b30c-cdff212df7b9', metadata={'source': '哺乳动物类宠物文档'}, page_content='猫是独立性很强的动物，通常喜欢自己的空间。'),
 Document(id='ef976202-9942-4c12-8fce-e3644ec779e0', metadata={'source': '哺乳动物类宠物文档'}, page_content='狗是忠诚和友善的动物，是人类的好朋友。')]

### 返回分数

In [31]:
vectorstore.similarity_search_with_score("猫")

[(Document(id='73d8123c-74af-49df-b30c-cdff212df7b9', metadata={'source': '哺乳动物类宠物文档'}, page_content='猫是独立性很强的动物，通常喜欢自己的空间。'),
  0.6830877661705017),
 (Document(id='6502aed4-54d4-42fc-8313-8a566ffb122d', metadata={'source': '鸟类宠物文档'}, page_content='鹦鹉是具有高智商的鸟类，能够模仿人类说话。'),
  0.7485527396202087),
 (Document(id='af540e7c-ab1a-4b9d-bd9d-b76cbcdf74f5', metadata={'source': '鱼类宠物文档'}, page_content='金鱼是相对容易饲养的鱼类，是很受新手欢迎的宠物。'),
  0.8727548718452454),
 (Document(id='ef976202-9942-4c12-8fce-e3644ec779e0', metadata={'source': '哺乳动物类宠物文档'}, page_content='狗是忠诚和友善的动物，是人类的好朋友。'),
  1.6835222244262695)]

In [32]:
output_embedding = embedding.embed_query("金鱼")

vectorstore.similarity_search_by_vector(output_embedding)

[Document(id='af540e7c-ab1a-4b9d-bd9d-b76cbcdf74f5', metadata={'source': '鱼类宠物文档'}, page_content='金鱼是相对容易饲养的鱼类，是很受新手欢迎的宠物。'),
 Document(id='6502aed4-54d4-42fc-8313-8a566ffb122d', metadata={'source': '鸟类宠物文档'}, page_content='鹦鹉是具有高智商的鸟类，能够模仿人类说话。'),
 Document(id='73d8123c-74af-49df-b30c-cdff212df7b9', metadata={'source': '哺乳动物类宠物文档'}, page_content='猫是独立性很强的动物，通常喜欢自己的空间。'),
 Document(id='ef976202-9942-4c12-8fce-e3644ec779e0', metadata={'source': '哺乳动物类宠物文档'}, page_content='狗是忠诚和友善的动物，是人类的好朋友。')]

In [33]:
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)  # select top result

retriever.batch(["鹦鹉"])

[[Document(id='6502aed4-54d4-42fc-8313-8a566ffb122d', metadata={'source': '鸟类宠物文档'}, page_content='鹦鹉是具有高智商的鸟类，能够模仿人类说话。')]]

In [34]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 4},
)

retriever.batch(["狗"])

[[Document(id='ef976202-9942-4c12-8fce-e3644ec779e0', metadata={'source': '哺乳动物类宠物文档'}, page_content='狗是忠诚和友善的动物，是人类的好朋友。'),
  Document(id='6502aed4-54d4-42fc-8313-8a566ffb122d', metadata={'source': '鸟类宠物文档'}, page_content='鹦鹉是具有高智商的鸟类，能够模仿人类说话。'),
  Document(id='73d8123c-74af-49df-b30c-cdff212df7b9', metadata={'source': '哺乳动物类宠物文档'}, page_content='猫是独立性很强的动物，通常喜欢自己的空间。'),
  Document(id='af540e7c-ab1a-4b9d-bd9d-b76cbcdf74f5', metadata={'source': '鱼类宠物文档'}, page_content='金鱼是相对容易饲养的鱼类，是很受新手欢迎的宠物。')]]

In [35]:
import getpass
import os

# from langchain_deepseek import ChatDeepSeek
#
# llm = ChatDeepSeek(
#         model="deepseek-chat",
#         temperature=0,
#         max_tokens=1024,
#         timeout=None,
#         max_retries=2)

# from langchain_openai import ChatOpenAI
#
# llm = ChatOpenAI(base_url="https://api.siliconflow.cn/v1/", model="Qwen/Qwen3-8B")

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(base_url="http://192.168.88.15:1025/v1/", model="qwen3-4b")

In [36]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", message)])

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm

print(retriever)

tags=['Chroma', 'OpenAIEmbeddings'] vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001B3361B4910> search_kwargs={'k': 4}


In [40]:
response = rag_chain.invoke("请介绍一下金鱼")

print(response.content)

金鱼是相对容易饲养的鱼类，是很受新手欢迎的宠物。
