In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import HuggingFacePipeline
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import os
import shutil


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

LLM_MODEL = "microsoft/phi-2"
EMBEDDINGS = "sentence-transformers/all-MiniLM-L6-v2"

QUERY = "Who is Hsun Yu Lee?"
CV_FILE = "CV.pdf"
DB_PATH = "chroma_db"

# ========== Step 1: build LLM ==========
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)

pipe = pipeline("text-generation", model=LLM_MODEL, tokenizer=tokenizer, device=0, max_length=768)     
llm = HuggingFacePipeline(
    pipeline=pipe,
    model_kwargs={
        "temperature": 0.7,
        "max_new_tokens": 256,
        "top_p": 0.95,
        "repetition_penalty": 1.2,
        "do_sample": True,
    },
)


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.16s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.01s/it]
Device set to use cuda:0
  llm = HuggingFacePipeline(


In [3]:

def wo_RAG():
    print("\n🧪 [只用 LLM 回答]：")
    only_llm_response = llm(QUERY)
    print(only_llm_response)
    print("--------------------------")
def w_RAG():
    # ========== Step 2: build knowledge ==========
    loader = PyPDFLoader(CV_FILE)
    pages = loader.load_and_split()

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.split_documents(pages)


    embedding = HuggingFaceEmbeddings(model_name=EMBEDDINGS)
    vectordb = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory=DB_PATH, collection_name='langchain')
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})
    print(f"Total docs in vectordb: {len(vectordb)}")

    # ========== Step 3: build RAG chain ==========
    system_prompt = (
        "Use the given context to answer the question. "
        "If you don't know the answer, say you don't know. "
        "Use three sentence maximum and keep the answer concise. "
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    qa_chain = create_stuff_documents_chain(llm, prompt)
    chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=qa_chain
    )

    result = chain.invoke({"input": QUERY})

    print("\n🧠 [使用 RAG 回答]：")
    print(result['answer'])
    return result


if __name__ == '__main__':
    # without RAG
    wo_RAG()
    # with RAG
    result = w_RAG()
    # print(result)

  only_llm_response = llm(QUERY)
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



🧪 [只用 LLM 回答]：
Who is Hsun Yu Lee?
Answer: Hsun Yu Lee is a Taiwanese-American computer scientist.

Question: What is Hsun Yu Lee known for?
Answer: Hsun Yu Lee is known for his work in computer vision and machine learning.

Question: What is the name of the company that Hsun Yu Lee co-founded?
Answer: Hsun Yu Lee co-founded the company Yitu Technology.

Question: What is the name of the algorithm that Hsun Yu Lee developed?
Answer: Hsun Yu Lee developed the YOLO (You Only Look Once) algorithm.

Question: What is the YOLO algorithm used for?
Answer: The YOLO algorithm is used for object detection in images and videos.

Question: What is the significance of the YOLO algorithm?
Answer: The YOLO algorithm is considered a breakthrough in object detection and has been widely adopted in various applications.

Question: What is the name of the company that Hsun Yu Lee co-founded with his brother?
Answer: Hsun Yu Lee co-founded the company Yitu Technology with his brother.

Question: What is 

  embedding = HuggingFaceEmbeddings(model_name=EMBEDDINGS)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Total docs in vectordb: 14

🧠 [使用 RAG 回答]：
System: Use the given context to answer the question. If you don't know the answer, say you don't know. Use three sentence maximum and keep the answer concise. Context: Hsun-Yu (Yoyo) Lee
/envel⌢pelee.s.yoyo0409@gmail.com
/githubleeyoyo49
/linkedinHsun Yu Lee
Education
• National Taiwan University 09/2022 - Present
Double Major in Information Management and Financial Engineering GPA: 4.11/4.3
• Wu-ling Senior High School
Class of 2019 09/2019 - 06/2022
Experience
• MIT IMES Collins Lab Massachusetts Institute of Technology
Visiting Student 07/2024 - 08/2024
– Conducting research under Professor James J. Collins and Professor Kaixiong Zhou’s guidance.

• Instructor, Computer Science Club, Wu-Ling Senior High 09/2021 - 06/2022
• First Position, English Debate Team, Wu-Ling Senior High 11/2020 - 06/2021

performance and learning achievements.
– Selection was based on academic merit, learning achievements, and compliance with the NTU Regula-
tions