In [26]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader

byd_file_path = r"D:\appChat_demo\backend\data\docs\byd.txt"

tesla_file_path = r"D:\appChat_demo\backend\data\docs\tesla.txt"

def make_docs(file_path):
    loader = TextLoader(file_path, autodetect_encoding=True)

    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=100,
            add_start_index=True,
        )

    return text_splitter.split_documents(data)


byd_docs = make_docs(byd_file_path)

tesla_docs = make_docs(tesla_file_path)

In [27]:
for doc in tesla_docs:
    doc.metadata['tags'] = ['tesla', 'usa'] 

for doc in byd_docs:
    doc.metadata['tags'] = ['byd', 'china'] 

In [35]:
byd_metadata = [doc.metadata for doc in byd_docs]

byd_texts = [doc.page_content for doc in byd_docs]

tesla_metadata = [doc.metadata for doc in tesla_docs]

tesla_texts = [doc.page_content for doc in tesla_docs]

In [42]:
import dotenv
dotenv.load_dotenv(dotenv.find_dotenv())


from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
import numpy as np
import os

# 初始化向量数据库
embedding = OpenAIEmbeddings()  # 使用 OpenAI 的 Embedding 服务

PATH=r"D:\appChat_demo\backend\data\docs\faiss"

vectorstore = FAISS.from_documents(
    documents=byd_docs,
    embedding=embedding,
)



# # 保存向量数据库到磁盘
vectorstore.save_local(PATH)



In [43]:
# load existing vecdb : load_local

loaded_vectorstore = FAISS.load_local(PATH, embedding, allow_dangerous_deserialization=True)

docs_vertor_index = loaded_vectorstore.add_documents(tesla_docs)

In [44]:
loaded_vectorstore.add_documents(tesla_docs)

['238d76cb-d9c7-4801-8ee0-408a13fbb4a3',
 'f6e8eaba-0779-47d7-8d6e-24046f325457',
 'c2a0b647-c43d-492d-85b9-7d22b9bb2a4d',
 '4d10b06f-0da8-426f-a8cb-912f98317524',
 'a3076411-3714-40e8-9274-6e86661fa7b0',
 'facd4074-3e3e-4410-abae-8884acd212ee',
 '4d7f28b3-0a3c-4b4a-a02f-f29f6cbf1417',
 '6180ac48-2ffc-4ffe-b3c4-06d4d97375e0',
 '62a76449-9258-4f9a-a529-9985613eab9b',
 'eb6ad10b-009a-4e66-99c7-4b7eafd09b20',
 '0f1fd469-a15e-4ad8-b70c-320fccb9d66b',
 'b5e76c28-6d1d-47ec-a74c-175d6a928264',
 '66ab644d-2c21-40f0-b463-b8b9e6dc3596',
 'e41304ec-223f-49dd-9bea-d6d2ed4596f9',
 'eb78ee9d-b9f6-44e6-9f9b-df66abee57a1',
 'e768f3ba-8904-48bd-97a0-221d8fa0016e',
 '31b62bdc-470a-4013-8950-1e50b0bad58d',
 '66d81f79-0970-4141-b7a8-38ca3e5ed88c',
 '690bf472-8d5d-411e-8b42-757842ad8520',
 'aacc34f8-9ba1-40c7-8fcf-1e66bc8e73aa',
 'e6dd26ef-ea14-4cb0-8909-6ea684df3caf',
 'e1a3bc63-9b50-4aaf-87b3-c9802effd83e',
 '57b8b6c0-373a-40c3-a9e5-da7a688603f1',
 '9711b184-8340-4915-8330-7c17da485ad2',
 '9fff7bed-b1aa-

In [45]:
loaded_vectorstore.save_local(PATH)

In [46]:
# 加载之前保存的向量数据库
loaded_vectorstore = FAISS.load_local(PATH, embedding, allow_dangerous_deserialization=True)

In [68]:
# metadata filtering

query_text = "what is tesla"
docs = loaded_vectorstore.similarity_search_with_score(query_text, k=5, filter=lambda x: "tesla" in x["tags"])

# 输出结果
for doc, score in docs:
    print(f"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}")

Content: Founding (2003–2004)
The company was incorporated as Tesla Motors, Inc. on July 1, 2003, by Martin Eberhard and Marc Tarpenning.[10][11] They served as chief executive officer and chief financial officer, respectively.[12] Eberhard said that he wanted to build "a car manufacturer that is also a technology company", with its core technologies as "the battery, the computer software, and the proprietary motor".[13], Metadata: {'source': 'D:\\appChat_demo\\backend\\data\\docs\\tesla.txt', 'start_index': 0, 'tags': ['tesla', 'usa']}, Score: 0.3647381067276001
Content: Roadster (2005–2009)
Main article: Tesla Roadster (first generation)
Elon Musk took an active role within the company, but was not deeply involved in day-to-day business operations.[18] The company's strategy was to start with a premium sports car aimed at early adopters and then move into more mainstream vehicles, including sedans and affordable compacts.[19], Metadata: {'source': 'D:\\appChat_demo\\backend\\data\\do

In [72]:
from langchain_chroma import Chroma
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# 初始化向量数据库
embedding = OpenAIEmbeddings()

chroma_persist_directory = r'D:\appChat_demo\backend\data\vector_db'
vectordb = Chroma(persist_directory=chroma_persist_directory, embedding_function=embedding)

# query_text = "what is tesla"
# docs = vectordb.similarity_search_with_score(query_text, k=5)

# # 输出结果
# for doc, score in docs:
#     print(f"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}")

loaded_vectorstore.__class__

langchain_community.vectorstores.faiss.FAISS