In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import  RecursiveCharacterTextSplitter



In [2]:
# 讀取檔案
file_path = r"C:\Users\mindy\桌面\張茗溱.pdf"
# file_path = r"E:\腸易激.pdf"
loader = file_path.endswith(".pdf") and PyPDFLoader(file_path) or TextLoader(file_path)


In [3]:
# 選擇 splitter 並將文字切分成多個 chunk 
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) 
texts = loader.load_and_split(splitter)


In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# 建立本地 db
# 建立本地 db
embeddings = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese",  model_kwargs={"device": "cuda:0"})
db = Chroma.from_documents(
    texts, 
    embeddings, 
    persist_directory="./chroma_db"
)
retriever = db.as_retriever(search_kwargs={"k": 2})

No sentence-transformers model found with name GanymedeNil/text2vec-large-chinese. Creating a new one with mean pooling.


In [6]:
# Setup the pipeline

from llama_cpp import Llama

llm = Llama.from_pretrained(
	repo_id="taide/TAIDE-LX-7B-Chat-4bit",
	filename="taide-7b-a.2-q4_k_m.gguf",
    n_ctx=4096,                                      # 上下文长度
    max_tokens=3000,                                 # 最大生成 token
    use_mlock=True,                                  # 使用锁定内存（推荐开启）
    n_gpu_layers=40                                  # 分配给 GPU 的 transformer 层数
)

llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from C:\Users\mindy\.cache\huggingface\hub\models--taide--TAIDE-LX-7B-Chat-4bit\snapshots\9063ed154144775841f3953b69b534c6e2d564d8\.\taide-7b-a.2-q4_k_m.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.ro

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """
根據以下檢索資料，請提供一個的回答，並補充必要的背景信息和實例。請確保回答全面且深入。
資料：
{context}
問題：
{question}
"""
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
question='張茗溱有哪些實務專案經驗'
retrieved_docs = retriever.get_relevant_documents(question)
context = "\n".join([doc.page_content[:1000] for doc in retrieved_docs])  

response = llm(f"{prompt.format(context=context, question=question)}", max_tokens=2048, temperature=0.8, stream=True)
for chunk in response:
    print(chunk["choices"][0]["text"], end="", flush=True)

  retrieved_docs = retriever.get_relevant_documents(question)
