In [5]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain import OpenAI, PromptTemplate
import glob

In [6]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [None]:
from langchain.document_loaders import PyPDFLoader

documents = []
for file in os.listdir("./resume"):
    if file.endswith(".pdf"):
        pdf_path = "./resume/" + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())

In [7]:
llm = OpenAI(temperature=0.2)
def summarize_pdfs_from_folder(pdfs_folder):
    summaries = []
    for pdf_file in glob.glob(pdfs_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        chain = load_summarize_chain(llm, chain_type="map_reduce")
        summary = chain.run(docs)
        print("Summary for: ", pdf_file)
        print(summary)
        print("\n")
        summaries.append(summary)
    
    return summaries

In [8]:
def custom_summary(pdf_folder, custom_prompt):
    summaries = []
    for pdf_file in glob.glob(pdf_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        prompt_template = custom_prompt + """

        {text}

        SUMMARY:"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
        chain = load_summarize_chain(llm, chain_type="map_reduce", 
                                    map_prompt=PROMPT, combine_prompt=PROMPT)
        summary_output = chain({"input_documents": docs},return_only_outputs=True)["output_text"]
        summaries.append(summary_output)
        
    return summaries

In [10]:
summaries = summarize_pdfs_from_folder("./resume")

Summary for:  ./resume\Pratishtha Singh.pdf
 Pratishtha Singh is a student at Jaypee University of Engineering and Technology in India studying Computer Science. She has experience in a variety of languages, frameworks, and tools, and has worked on several projects. She has also interned and held a position at GDSC JUET. In addition, she has provided support and resources to students in need, advocated for equal access to education and resources, and led initiatives successfully. She achieved a 13th rank in an online round and 4th rank among the top participants in a final round at a Hackathon (Codespire) in March 2023.


Summary for:  ./resume\Sandali-CV.pdf
 Sandali Gupta is a B.Tech Computer Science student with strong problem-solving and analytical skills. He is experienced in project development and has technical skills in Python, Java, CPP, HTML, CSS, Javascript, and various frameworks. He has also held positions of responsibility and volunteered for various organizations.




Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

Summary for:  ./resume\Vani Seth CV.pdf
 Vani Seth is a 3rd-year student in Computer Science and Engineering at Jaypee University of Engineering and Technology, Guna. She has a current CGPA of 8.7 and is among the top 5 students in her class. She has experience in Machine Learning, Artificial Intelligence, Data Science, Open Source Contributor, Graphic Design, Research Assistant, and AI/ML Intern. She has participated in various hackathons and has achieved several awards. Her skills include Python, C, C++, Java, JavaScript, Bash, HTML/CSS, MySQL, SQLite, Langchain, Tensorflow, Keras, Numpy, Pandas, Matplotlib, Scikit-Learn, Pytorch, OpenCV, Kubernetes, Docker, Git, Photoshop, Linux, AWS, GCP, Windows, and she has created projects such as Class Scheduler using AI, Steg.




In [11]:
# Save all summaries into one .txt file
with open("summaries.txt", "w") as f:
    for summary in summaries:
        f.write(summary + "\n"*3)