## ChatGLM For RAG

In [None]:
!git clone https://github.com/nullskymc/ChatGLM3.git

In [None]:
!pip innstall modelscope

from modelscope import snapshot_download
model_dir = snapshot_download("ZhipuAI/chatglm3-6b", revision = "v1.0.0")

In [None]:
!pip install -r ./ChatGLM3/requirements.txt 

In [None]:
!python ./ChatGLM3/openai_api_demo/api_server.py

In [None]:
from langchain.chains import LLMChain
from langchain.schema.messages import AIMessage
from langchain_community.llms.chatglm3 import ChatGLM3
from langchain_community.embeddings import ModelScopeEmbeddings
from langchain_core.prompts import PromptTemplate

In [None]:
endpoint_url = "http://127.0.0.1:8000/v1/chat/completions" 

llm = ChatGLM3(
    endpoint_url=endpoint_url,
    max_tokens=80000,
    top_p=0.9,
)


model_id = "iic/nlp_corom_sentence-embedding_chinese-base"
embeddings = ModelScopeEmbeddings(model_id=model_id)

template = """{question}"""
prompt = PromptTemplate.from_template(template)

In [None]:
from langchain_community.document_loaders import PyPDFLoader

def pdf_loader(url):
    # pdf文档加载器
    loader = PyPDFLoader(url)
    docs = loader.load_and_split()
    return docs

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter

#以某一系列文本创建以faiss为后端的向量数据库，创建完后的数据库较大，耐心等待


def create_vector_db(data_path, db_path, loader):
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    data = loader(data_path)
    docs = text_splitter.split_documents(data)
    db = FAISS.from_documents(docs, embeddings)
    db.save_local(db_path)  #保存路径

create_vector_db("./data_text.pdf", "./vector_db/water_db", pdf_loader)  # 创建数据库样例

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.prompts import ChatPromptTemplate
# 创建检索链

input_text = '你的问题'
prompt = ChatPromptTemplate.from_template(
        """
        你的预设

        <context>
        {context}
        </context>

        Question: {input}
        """)

new_db = FAISS.load_local("./vector_db/water_db", embeddings, allow_dangerous_deserialization=True)
# 合成文档链
document_chain = create_stuff_documents_chain(llm, prompt)

retriever = new_db.as_retriever()  # 从向量数据库中检索
retrieval_chain = create_retrieval_chain(retriever, document_chain)

response = retrieval_chain.invoke({"input": input_text})
print(response['answer'])