In [1]:
from dotenv import load_dotenv
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.document_loaders import TextLoader
from langchain_openai import ChatOpenAI, OpenAIEmbeddings # Now this import should work
import openai
import faiss 
load_dotenv()

import os

openai.api_key = ''

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
from langchain.document_loaders import CSVLoader

# 파일 경로 설정
input_file_path = 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv'

# CSVLoader를 사용하여 파일을 로드합니다.
loader = CSVLoader(input_file_path)
datas = loader.load()

# 로드된 문서를 출력합니다.
datas

[Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 0}, page_content='\ufeffDate: 2024-04-06 20:16:58\nUser: 김호준\nMessage: 정신나갈거 같아'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 1}, page_content='\ufeffDate: 2024-04-06 20:17:01\nUser: 김호준\nMessage: ~~~~'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 2}, page_content='\ufeffDate: 2024-04-06 20:17:08\nUser: 조찬규\nMessage: ㅋㅋ'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 3}, page_content='\ufeffDate: 2024-04-06 20:17:09\nUser: 조찬규\nMessage: 이모티콘'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 4}, page_content='\ufeffDate: 2024-04-06 20:17:21\nUser: 김호준\nMessage: 본선진출 100만원'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 5}, page_content='\ufeffDate: 2024-04-06 20:17:23\nUser: 김호준\nMessage: 이게 엘지?'),
 Document(me

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

splits = text_splitter.split_documents(datas)
splits

[Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 0}, page_content='\ufeffDate: 2024-04-06 20:16:58\nUser: 김호준\nMessage: 정신나갈거 같아'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 1}, page_content='\ufeffDate: 2024-04-06 20:17:01\nUser: 김호준\nMessage: ~~~~'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 2}, page_content='\ufeffDate: 2024-04-06 20:17:08\nUser: 조찬규\nMessage: ㅋㅋ'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 3}, page_content='\ufeffDate: 2024-04-06 20:17:09\nUser: 조찬규\nMessage: 이모티콘'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 4}, page_content='\ufeffDate: 2024-04-06 20:17:21\nUser: 김호준\nMessage: 본선진출 100만원'),
 Document(metadata={'source': 'KakaoTalk_Chat_그룹채팅_2024-08-14-16-34-07.csv', 'row': 5}, page_content='\ufeffDate: 2024-04-06 20:17:23\nUser: 김호준\nMessage: 이게 엘지?'),
 Document(me

In [5]:
# 벡터스토어를 생성합니다.
vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings(openai_api_key=openai.api_key))

# 대화에 포함되어 있는 정보를 검색하고 생성합니다.
retriever = vectorstore.as_retriever()
print(vectorstore)
print(retriever)

<langchain_community.vectorstores.faiss.FAISS object at 0x0000024AF98228D0>
tags=['FAISS', 'OpenAIEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000024AF98228D0>


In [6]:
prompt = hub.pull("rlm/rag-prompt",api_key=openai.api_key)
prompt

  warn_beta(


ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [7]:
llm = ChatOpenAI(model_name="gpt-4o", temperature=0,api_key=openai.api_key)


def format_docs(docs):
    # 검색한 문서 결과를 하나의 문단으로 합쳐줍니다.
    return "\n\n".join(doc.page_content for doc in docs)


# 체인을 생성합니다.
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [8]:
rag_chain.invoke(
    """해당 대화에서 김호준의 언어습관을 분석해줘.  """
)  #문서에 대한 질의를 입력하고, 답변을 출력합니다.

'김호준의 언어습관은 줄임말과 인터넷 용어를 자주 사용하는 특징이 있습니다. 예를 들어, "현기증온거임?"과 "소개팅함? ㄹㅈㄷ" 같은 표현에서 이를 확인할 수 있습니다.'