In [13]:
from dotenv import load_dotenv
from PyPDF2 import PdfReader
import streamlit as st
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import ChatOpenAI
from langchain_community.callbacks.manager import get_openai_callback
from langchain_huggingface import HuggingFaceEmbeddings

# 텍스트 chunk 분할 & 임베딩 처리(벡터 변환)
def process_text(text):
    text_splitter = CharacterTextSplitter(
        separator='\n',
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len)
    chunks = text_splitter.split_text(text)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    documents = FAISS.from_texts(chunks, embeddings)
    return documents 

def main():
    st.title("📄PDF 요약 웹사이트")
    st.divider()
    st.text("LLM을 이용해 PDF 요약 기능을 제공합니다.")
    try:
        load_dotenv()
    except:
        st.error(str(e))
        return 
    pdf = st.file_uploader('PDF 파일을 업로드해주세요', type='pdf')
    if pdf is not None:
        pdf_reader = PdfReader(pdf)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() # 각 페이지의 텍스트를 추출해 text에 추가
        documents = process_text(text)

        # print(f"documents : {documents}") # Debugging Point 1
        
        query = "Please summary uploaded PDF File text into 3 ~ 5 sentences. Also, please answer in Korean."
        if query:
            docs = documents.similarity_search(query)
            
            # print(f"docs : {docs}") # Debugging Point 2
            
            llm = ChatOpenAI(
                temperature=0.1,
                model_name="gpt-3.5-turbo-16k" # model_name="gpt-4-turbo-preview"
            )

            chain = load_qa_chain(llm, chain_type="stuff")
    
            with get_openai_callback() as cost:
                response = chain.invoke({'input_documents': docs, 'question':query})['output_text']
                print(cost)
            st.subheader("📍 요약 결과")
            st.write(response)

if __name__ == '__main__':
    main()