In [None]:
!pip install langchain-community pypdf

In [None]:
import os
from dotenv import load_dotenv
import PyPDF2
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

load_dotenv()
openai_api_key = "your_openai_api_key"

# PDF에서 텍스트 추출
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

# 벡터 저장소 생성
def create_vector_store(pdf_text):
    # 텍스트를 청크로 분할
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    chunks = text_splitter.split_text(pdf_text)
    
    # 임베딩 생성 및 FAISS 벡터 저장소에 저장
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    vector_store = FAISS.from_texts(chunks, embeddings)
    
    return vector_store

# 면접 질문에 대한 응답 생성
def answer_interview_question(vector_store, question):
    # 프롬프트 템플릿 정의
    template = """
    You are a professional cadidate's friend. 
    Objectively evaluate the candidate's suitability for the job based on the CV information below and the interview question.
    Based on the CV information below, please formulate the best answers to the interview questions.
    
    CV information:
    {context}
    
    Interview question: {question}


    Your answer should include
    1. highlight relevant experience and skills from your CV
    2. include specific achievements and examples
    3. explain what value you can bring to the company
    4. maintain a professional and confident tone
    5. Don't use "they", just use candidate's name.
    6. Answer with the numbering format.



    
    답변:
    """
    
    PROMPT = PromptTemplate(
        template=template,
        input_variables=["context", "question"]
    )
    
    # LLM 및 검색 체인 설정
    llm = ChatOpenAI(model_name="gpt-4", openai_api_key=openai_api_key)
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(search_kwargs={"k": 4}),
        chain_type_kwargs={"prompt": PROMPT}
    )
    
    # 질문에 대한 응답 생성
    response = qa_chain.invoke({"query": question})
    
    return response["result"]

# 메인 함수
def main(pdf_path):

    question = input("Ask for me the question e.g. Does this cadidate suitable for the job as a data engineer at our company?")

    # PDF에서 텍스트 추출
    pdf_text = extract_text_from_pdf(pdf_path)
    
    # 벡터 저장소 생성
    vector_store = create_vector_store(pdf_text)
    
    # 질문에 대한 응답 생성
    answer = answer_interview_question(vector_store, question)
    
    return answer




In [None]:
# 사용 예시
if __name__ == "__main__":
    pdf_path = "CV.pdf"  # CV PDF 파일 경로
    response = main(pdf_path)
    print(response)