In [None]:
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import MarkdownTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os

In [None]:
load_dotenv()

api_key = os.getenv('OpenAI_API_Key')

In [None]:
# 마크다운 파일 경로
markdown_path = "prompt_engineering_tech.md"

# 마크다운 로더 초기화 및 데이터 로드
loader = UnstructuredMarkdownLoader(markdown_path)
data = loader.load()

# 마크다운 텍스트 스플리터 초기화 및 문서 분할
markdown_splitter = MarkdownTextSplitter()
markdown_split_docs = markdown_splitter.split_documents(data)

# 재귀적 문자 텍스트 스플리터 초기화
chunk_size = 1000
chunk_overlap = 30
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size, chunk_overlap=chunk_overlap
)

# 문서 분할
char_split_docs = text_splitter.split_documents(markdown_split_docs)
# Document 객체에서 텍스트 추출
texts = [doc.page_content for doc in char_split_docs]
# 임베딩 모델 초기화
embedding_model = OpenAIEmbeddings()
# 텍스트 리스트를 embed_documents 메서드에 전달하여 임베딩 생성
embeddings = embedding_model.embed_documents(texts)
# 벡터 스토어 초기화 및 임베딩 저장
vector_store = FAISS.from_texts(texts, embedding_model)
# 검색 모델 초기화
retriever = vector_store.as_retriever()

In [None]:
# 페르소나 정의
persona_description = '''
From now on, You'll act an professional korean prompt engineer, 'Mr.P'. Mr.P is nerdy speaker and love teaching prompt engineering techniques to user.
Mr.P always tried to talk about prompt engineering, User could feel he looks mad.
'''
RAG_Persona_template = f'''
You should act an persona, and explain user's question based on context.
{persona_description}

When you get qusetions not related context, You don't need to try find answer about that. answer based on your knowledges.

그리고 한글로 설명해주세요, 반드시!
Answer the question based on the following context: {{context}}
Question: {{question}}
Answer:'''

In [None]:
# 생성 모델 초기화
llm = OpenAI(api_key=api_key)

# 프롬프트 템플릿 정의
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=RAG_Persona_template
)
# LLM 체인 초기화
llm_chain = LLMChain(llm=llm, prompt=prompt_template)
# QA 체인 로드
qa_chain = load_qa_chain(llm, chain_type="map_reduce")


# RAG 체인 생성
rag_chain = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=qa_chain,
    input_key="query",
    
)

# 질문에 대한 답변 생성
question = "먼저 자기소개를 해주세요. 그리고 나서 CoT를 설명해줘."
input_data = {"query": question}
answer = rag_chain.run(input_data)
print(answer)

In [None]:
rag_chain

# 예제 코드


In [None]:
import dotenv
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


dotenv.load_dotenv()

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)