In [1]:
import sys
import os
from dotenv import load_dotenv


load_dotenv()

OPENAI_API_BASE = os.environ.get("OPENAI_API_BASE")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [2]:
from langchain_openai import ChatOpenAI
from helper_functions import get_local_embedding_model


llm = ChatOpenAI(model="qwen-max", temperature=0)
embedding_model = get_local_embedding_model(model_name="C:\\Home\\Documents\\Projects\\models\\BAAI\\bge-large-en-v1.5")

# 加载 CSV 文件

In [3]:
import pandas as pd

file_path = "../data/customers-100.csv"
data = pd.read_csv(file_path)

data.head()

Unnamed: 0,Index,Customer Id,First Name,Last Name,Company,City,Country,Phone 1,Phone 2,Email,Subscription Date,Website
0,1,DD37Cf93aecA6Dc,Sheryl,Baxter,Rasmussen Group,East Leonard,Chile,229.077.5154,397.884.0519x718,zunigavanessa@smith.info,2020-08-24,http://www.stephenson.com/
1,2,1Ef7b82A4CAAD10,Preston,Lozano,Vega-Gentry,East Jimmychester,Djibouti,5153435776,686-620-1820x944,vmata@colon.com,2021-04-23,http://www.hobbs.com/
2,3,6F94879bDAfE5a6,Roy,Berry,Murillo-Perry,Isabelborough,Antigua and Barbuda,+1-539-402-0259,(496)978-3969x58947,beckycarr@hogan.com,2020-03-25,http://www.lawrence.com/
3,4,5Cef8BFA16c5e3c,Linda,Olsen,"Dominguez, Mcmillan and Donovan",Bensonview,Dominican Republic,001-808-617-6467x12895,+1-813-324-8756,stanleyblackwell@benson.org,2020-06-02,http://www.good-lyons.com/
4,5,053d585Ab6b3159,Joanna,Bender,"Martin, Lang and Andrade",West Priscilla,Slovakia (Slovak Republic),001-234-203-0635x76146,001-199-446-3860x3486,colinalvarado@miles.net,2021-04-17,https://goodwin-ingram.com/


In [4]:
from langchain_community.document_loaders.csv_loader import CSVLoader


csv_loader = CSVLoader(file_path=file_path)
docs = csv_loader.load_and_split()

len(docs)

100

In [5]:
print(docs[0].page_content)

Index: 1
Customer Id: DD37Cf93aecA6Dc
First Name: Sheryl
Last Name: Baxter
Company: Rasmussen Group
City: East Leonard
Country: Chile
Phone 1: 229.077.5154
Phone 2: 397.884.0519x718
Email: zunigavanessa@smith.info
Subscription Date: 2020-08-24
Website: http://www.stephenson.com/


# 将 CSV 文件插入向量数据库

In [6]:
from uuid import uuid4
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams


client = QdrantClient(host="127.0.0.1", port=6333)

collection_name = "csv_file"

if not client.collection_exists(collection_name=collection_name):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
    )

vector_store = QdrantVectorStore(
    client=client,
    collection_name=collection_name,
    embedding=embedding_model,
)

ids = [str(uuid4()) for _ in range(len(docs))]
vector_store.add_documents(documents=docs, ids=ids)

['f3bc9bed-5e11-4c67-8cc8-9f276c11079e',
 '16ad13a3-8c7c-463a-948a-6148514163e9',
 '813fbcb4-203b-4cb1-86e1-8672c0997f3f',
 'bb869f82-c666-4c40-b4b0-e5c54cfefc5d',
 '7f27891b-6f54-430c-9359-9b49eee75b08',
 'a93c4a95-305f-4822-a41c-71cb725bbcb9',
 '470b869b-ed27-4051-a75f-7ef67337d540',
 '1f2e76c1-30f8-45e3-b349-55e6ce88f4db',
 'f454fe9f-801b-4aae-95ea-a6d9af649a8c',
 'e43c1691-0d3a-476a-8a59-7728a0c65c5a',
 'cd401506-53c6-47e5-86a8-af283b006123',
 '935c04e4-d96c-4620-9f56-c0fba01afb0e',
 '0fec2e1e-7eaa-4eaf-a788-6ae6be2929d7',
 'e0e3486b-43d7-47e6-a3c2-3ff3fd8e4608',
 '686426b1-4378-446a-afa4-f29c74ab4b2a',
 'b17b4303-47cd-4d04-accd-2bab447972a6',
 'f2bc61c8-b0f8-40bb-a057-e24e92252216',
 '992c5ebc-6070-41fb-9d24-23c468bddc3e',
 '46e4fc63-f1aa-42bf-8eaf-040039c2d858',
 '2d998885-85c9-4a0c-96f9-3affa9d93791',
 '672f8da3-5bac-48cc-855c-8a9072d84c87',
 '5c7f6a07-926f-4ac8-8730-ef9bc9e1b310',
 'cbfc5ed4-00b7-4d08-b20f-8c738aee9127',
 'debed951-d4a0-49d8-ba7c-108c6bc74b89',
 '762a5eac-7b04-

# 创建 RAG 链

In [7]:
retriever=vector_store.as_retriever(search_kwargs={"k": 3})

retriever.invoke("which company does sheryl Baxter work for?")

[Document(metadata={'source': '../data/customers-100.csv', 'row': 0, '_id': 'f3bc9bed-5e11-4c67-8cc8-9f276c11079e', '_collection_name': 'csv_file'}, page_content='Index: 1\nCustomer Id: DD37Cf93aecA6Dc\nFirst Name: Sheryl\nLast Name: Baxter\nCompany: Rasmussen Group\nCity: East Leonard\nCountry: Chile\nPhone 1: 229.077.5154\nPhone 2: 397.884.0519x718\nEmail: zunigavanessa@smith.info\nSubscription Date: 2020-08-24\nWebsite: http://www.stephenson.com/'),
 Document(metadata={'source': '../data/customers-100.csv', 'row': 8, '_id': 'f454fe9f-801b-4aae-95ea-a6d9af649a8c', '_collection_name': 'csv_file'}, page_content='Index: 9\nCustomer Id: C2dE4dEEc489ae0\nFirst Name: Sheryl\nLast Name: Meyers\nCompany: Browning-Simon\nCity: Robersonstad\nCountry: Cyprus\nPhone 1: 854-138-4911x5772\nPhone 2: +1-448-910-2276x729\nEmail: mariokhan@ryan-pope.org\nSubscription Date: 2020-01-13\nWebsite: https://www.bullock.net/'),
 Document(metadata={'source': '../data/customers-100.csv', 'row': 42, '_id': '443

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


# 设置系统提示
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
    
])

combine_docs_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [12]:
rag_chain.invoke({"input": "which company does sheryl Baxter work for?"})

{'input': 'which company does sheryl Baxter work for?',
 'context': [Document(metadata={'source': '../data/customers-100.csv', 'row': 0, '_id': 'f3bc9bed-5e11-4c67-8cc8-9f276c11079e', '_collection_name': 'csv_file'}, page_content='Index: 1\nCustomer Id: DD37Cf93aecA6Dc\nFirst Name: Sheryl\nLast Name: Baxter\nCompany: Rasmussen Group\nCity: East Leonard\nCountry: Chile\nPhone 1: 229.077.5154\nPhone 2: 397.884.0519x718\nEmail: zunigavanessa@smith.info\nSubscription Date: 2020-08-24\nWebsite: http://www.stephenson.com/'),
  Document(metadata={'source': '../data/customers-100.csv', 'row': 8, '_id': 'f454fe9f-801b-4aae-95ea-a6d9af649a8c', '_collection_name': 'csv_file'}, page_content='Index: 9\nCustomer Id: C2dE4dEEc489ae0\nFirst Name: Sheryl\nLast Name: Meyers\nCompany: Browning-Simon\nCity: Robersonstad\nCountry: Cyprus\nPhone 1: 854-138-4911x5772\nPhone 2: +1-448-910-2276x729\nEmail: mariokhan@ryan-pope.org\nSubscription Date: 2020-01-13\nWebsite: https://www.bullock.net/'),
  Document(m