使用\test_dir\milvus_docs\en\faq下的四个markdown文档测试

先创建两个函数，将文本内容转化为向量，这个步骤和embeddings_test.ipynb中差不多

In [10]:
from pymilvus import model

sentence_transformer_ef = model.dense.SentenceTransformerEmbeddingFunction(
    model_name = 'all-MiniLM-L6-v2',
    device = 'cpu'
)

def encode_document(doc):
    doc_embedding = sentence_transformer_ef.encode_documents([doc])
    return doc_embedding[0]

def encode_query(query):
    query_embedding = sentence_transformer_ef.encode_queries(query)
    print(query_embedding[0])
    return query_embedding[0]

读取md文件中的数据，用上面的两个函数生成嵌入向量，然后用milvus存储

In [2]:
from pymilvus import MilvusClient
from pymilvus import connections,db
conn = connections.connect(host="127.0.0.1", port=19530)
database = db.create_database("learn")

In [11]:
from glob import glob

def loaddata():
    embedding_dim = 384
    # 读文件的内容到这个列表中
    text_lines = []
    for file_path in glob("milvus_docs/en/faq/*.md", recursive=True):
        with open(file_path, 'r') as file:
            file_text = file.read()
        text_lines += file_text.split("# ")
        #表示按标题切割，md语法
        
    milvus_client = MilvusClient(uri='http://localhost:19530',db_name='learn')
    collection_name = "my_rag_collection"
    
    if milvus_client.has_collection(collection_name):
        milvus_client.drop_collection(collection_name)
    
    milvus_client.create_collection(collection_name=collection_name, dimension=embedding_dim,
     metric_type="IP",#指定向量相似度计算的度量类型为内积（Inner Product）。内积用于计算向量之间的相似度，适用于需要最大化相似度的场景
     consistency_level="Strong",#指定数据一致性级别为强一致性（Strong Consistency）。强一致性保证所有读操作都能读取到最新的写入数据。
      )
    data = []
    for i, text in enumerate(text_lines):
        data.append({"id":i, "vector": encode_document(text), "text": text})
    milvus_client.insert(collection_name=collection_name, data=data)
    

In [12]:
loaddata()

执行成功了，表示数据成功导入，去attu里面看一下可以看到数据已经在collection里了

下一步构建RAG
这里先写一个简单的html界面，用于测试，然后用flask部署。

实现Flask接口

In [None]:
from flask import Flask, render_template_string, request
from pymilvus import MilvusClient
import json
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

app = Flask(__name__)

html_template = """
<!doctype html>
<html lang="en" data-bs-theme="auto">

<head>
    <meta charset="utf-8">
    <title>简陋的测试</title>
</head>

<body class="bg-body-tertiary">

    <div class="container">
        <main>
            <div class="py-5 text-center">
                <h2>RAG DEMO</h2>
                <p class="lead">救救软件课设</p>
            </div>

            <div class="row mb-3">
                <h4 class="mb-3">请输入您的问题</h4>
                <form>
                    <div class="row mb-3">
                        <div class="col-12">
                            <label for="question" class="form-label">问题内容</label>
                            <input type="text" class="form-control" id="question" required
                                value="How is data stored in milvus?">
                        </div>
                    </div>

                    <button id="submit" class="w-100 btn btn-primary btn-lg" type="button">提问</button>
                </form>
            </div>
            <div class="row mb-3">
                <h4 class="mb-3">Prompt</h4>
                <textarea id="prompt" rows="10" style="margin-left: 10px;margin-right: 10px;">请先提问</textarea>
            </div>
            <div class="row mb-3">
                <h4 class="mb-3">回复</h4>
                <div id="response">请先提问</div>
            </div>
        </main>

        <footer class="my-5 pt-5 text-body-secondary text-center text-small">
            <p class="mb-1">&copy;2024 易俊哲</p>
        </footer>
    </div>
    <script>
        $('#submit').click(function () {
            $.post('/', { "question": $('#question').val() }, function (data) {
                $('#prompt').val(data.prompt)
                $('#response').html(data.ai_answer)
            }, 'json')
        });
    </script>

</html>
"""

@app.route('/', methods=['GET'])
def index():
    return render_template_string(html_template)

@app.route('/', methods=['POST'])
def indexPost():
    question = request.form.get('question')

    milvus_client = MilvusClient(uri='http://localhost:19530', db_name='learn')
    collection_name = "my_rag_collection"

    emb = encode_query(question)  # Encode the user's question into a vector
    # Search the database for relevant documents
    search_res = milvus_client.search(
        collection_name=collection_name,
        data=[emb],
        limit=3,  # Return 3 reference results
        search_params={"metric_type": "IP", "params": {}},
        output_fields=["text"],  # Return the text field
    )

    # Concatenate the returned documents into a single string as context
    context = "\n".join([res["entity"]["text"] for res in search_res[0]])

    # Use LangChain to call the LLM and generate a response
    SYSTEM_PROMPT = """
    Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.
    """

    USER_PROMPT = f"""
    Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
    <context>
    {context}
    </context>
    <question>
    {question}
    </question>
    """

    # Call the local LLM model using LangChain
    ollama = Ollama(base_url='http://localhost:11434', model="qwen:7b")
    parser = StrOutputParser()

    # Create the prompt template
    prompt_template = ChatPromptTemplate.from_messages(
        [("system", SYSTEM_PROMPT), ("user", USER_PROMPT)]
    )

    chain = prompt_template | ollama | parser

    return json.dumps({
        "prompt": USER_PROMPT,
        "ai_answer": chain.invoke({}),  # Invoke the chain to get the response from the LLM
    })

if __name__ == '__main__':
    app.run()

In [None]:
from flask import Flask,render_template,request
from pymilvus import MilvusClient
from glob import glob
import json
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate #注意引用了这个包
from langchain_core.output_parsers import StrOutputParser

app = Flask(__name__)

@app.route('/', methods=['GET'])
def index():
    return render_template("htmltest.html")

@app.route('/',methods=['POST'])
def indexPost():
    question = request.form.get('question')

    milvus_client = MilvusClient(uri='http://localhost:19530',db_name='learn')
    collection_name = "my_rag_collection"

    emb = encode_query(question) #这里把用户输入的问题转为一个向量值
    # 通过数据库搜索与问题相关的资料
    search_res = milvus_client.search(
        collection_name=collection_name,
        data=[
            emb
        ], 
        limit=3,  # 返回3个参考结果
        search_params={"metric_type": "IP", "params": {}}, 
        output_fields=["text"],  # Return the text field
    )

    # 将返回参考资料拼成一个大字符串，作为参考资料
    context = "\n".join([
        res["entity"]["text"]for res in search_res[0]
    ])

    # 接下来用langchain调用LLM生成回复
    SYSTEM_PROMPT  = """
    Human: You are an AI assistant. You are able to find answers to the questions from the contextual passage snippets provided.
    """

    USER_PROMPT = f"""
    Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
    <context>
    {context}
    </context>
    <question>
    {question}
    </question>
    """

    # 使用LangChain调用本地搭建的LLM模型，我的电脑上是qwen:7b
    ollama = Ollama(base_url='http://localhost:11434', model="qwen:7b")
    parser = StrOutputParser()

    # prompt
    prompt_template = ChatPromptTemplate.from_messages(
        [("system", SYSTEM_PROMPT), ("user", USER_PROMPT)]
    )

    chain = prompt_template | ollama | parser

    return json.dumps({
        "prompt":USER_PROMPT,
        "ai_answer":chain.invoke({}), # 这里invoke会将前面的SYSTEM_PROMPT和USER_PROMPT组装成prompt，提交给LLM，并解析LLM的响应
    })
if __name__ == '__main__':
    app.run()