In [10]:
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import WebBaseLoader
import bs4
from langchain import hub

In [17]:
# 단계 1: 문서 로드(Load Documents)
# 뉴스기사 내용을 로드하고, 청크로 나누고, 인덱싱합니다.
url = "https://dl.acm.org/doi/10.1145/3173574.3174223"
loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs=dict(parse_only=bs4.SoupStrainer("div")
    ),
)
docs = loader.load()


# 단계 2: 문서 분할(Split Documents)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

splits = text_splitter.split_documents(docs)

# 단계 3: 임베딩 & 벡터스토어 생성(Create Vectorstore)
# 벡터스토어를 생성합니다.
vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# 단계 4: 검색(Search)
# 뉴스에 포함되어 있는 정보를 검색하고 생성합니다.
retriever = vectorstore.as_retriever()

# 단계 5: 프롬프트 생성(Create Prompt)
# 프롬프트를 생성합니다.
prompt = hub.pull("rlm/rag-prompt")

# 단계 6: 언어모델 생성(Create LLM)
# 모델(LLM) 을 생성합니다.
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    # 검색한 문서 결과를 하나의 문단으로 합쳐줍니다.
    return "\n\n".join(doc.page_content for doc in docs)


# 단계 7: 체인 생성(Create Chain)
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# 단계 8: 체인 실행(Run Chain)
# 문서에 대한 질의를 입력하고, 답변을 출력합니다.
question = "이 논문에서 만든 프로토타입은?"
response = rag_chain.invoke(question)

# 결과 출력
print(f"URL: {url}")
print(f"문서의 수: {len(docs)}")
print("===" * 20)
print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")

URL: https://dl.acm.org/doi/10.1145/3173574.3174223
문서의 수: 1
[HUMAN]
이 논문에서 만든 프로토타입은?

[AI]
AI avatars.


In [16]:
print(docs)

[Document(metadata={'source': 'https://dl.acm.org/doi/10.1145/3173574.3174223'}, page_content='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAdvanced Search\n\nBrowse\n\nAbout\n\n\n\n\n\n                Sign in\n            \n\n\n\n                        Register\n                    \n\n\n\n\n\n\n\n\nAdvanced SearchJournalsMagazinesProceedingsBooksSIGsConferencesPeopleMore\nSearch ACM Digital LibrarySearchSearch\nAdvanced Search\n\n\n\n10.1145/3173574.3174223acmconferencesArticle/Chapter ViewAbstractPublication PageschiConference Proceedingsconference-collectionschiConferenceProceedingsUpcoming EventsAuthorsAffiliationsAward WinnersMore\n\n\n\nHomeConferencesCHIProceedingsCHI \'18I Lead, You Help but Only with Enough Details: Understanding User Experience of Co-Creation with Artificial Intelligence\n\n\n\n\n\n\nExport CitationsSelect Citation formatBibTeXEndNoteACM RefPlease download or close your previous search result export first before starting a new bulk export.Preview is not availa

In [18]:
# 단계 8: 체인 실행(Run Chain)
# 문서에 대한 질의를 입력하고, 답변을 출력합니다.
question = "이 논문의 저자 이름은??"
response = rag_chain.invoke(question)

# 결과 출력
print(f"URL: {url}")
print(f"문서의 수: {len(docs)}")
print("===" * 20)
print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")

URL: https://dl.acm.org/doi/10.1145/3173574.3174223
문서의 수: 1
[HUMAN]
이 논문의 저자 이름은??

[AI]
Changhoon Oh, Jungwoo Song, Jinhan Choi


In [20]:
question = "이 논문의 제목은??"
response = rag_chain.invoke(question)

print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")

[HUMAN]
이 논문의 제목은??

[AI]
I Lead, You Help but Only with Enough Details: Understanding User Experience of Co-Creation with Artificial Intelligence.


In [21]:
question = "이 논문을 간단하게 요약해주세요."
response = rag_chain.invoke(question)

print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")

[HUMAN]
이 논문을 간단하게 요약해주세요.

[AI]
이 논문은 인공지능 아바타와 사용자 경험에 대해 다룹니다. (This paper discusses AI avatars and user experience.)


In [24]:
question = "What is the prototype developed in this paper?" 
response = rag_chain.invoke(question)

print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")

[HUMAN]
What is the prototype developed in this paper?

[AI]
The prototype developed in this paper is AI avatars, which are human-like.


In [25]:
question = "Can you tell me more about DuetDraw?"
response = rag_chain.invoke(question)

print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")

[HUMAN]
Can you tell me more about DuetDraw?

[AI]
DuetDraw is an AI interface that allows users and the AI agent to draw pictures collaboratively. A user study found that users were more content with DuetDraw when the tool gave detailed instructions and when the AI explained its intentions only when users wanted it to do so. The study employed both quantitative and qualitative methods to understand the user experience in this new user-AI collaboration.
