In [24]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain import OpenAI, PromptTemplate
import glob
import os

In [25]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [26]:
from langchain.document_loaders import PyPDFLoader

documents = []
for file in os.listdir("./resume"):
    if file.endswith(".pdf"):
        pdf_path = "./resume/" + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())

In [27]:
llm = OpenAI(temperature=0.2)
def summarize_pdfs_from_folder(pdfs_folder):
    summaries = []
    for pdf_file in glob.glob(pdfs_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        chain = load_summarize_chain(llm, chain_type="map_reduce")
        summary = chain.run(docs)
        print("Summary for: ", pdf_file)
        print(summary)
        print("\n")
        summaries.append(summary)
    
    return summaries

In [28]:
def custom_summary(pdf_folder, custom_prompt):
    summaries = []
    for pdf_file in glob.glob(pdf_folder + "/*.pdf"):
        loader = PyPDFLoader(pdf_file)
        docs = loader.load_and_split()
        prompt_template = custom_prompt + """

        {text}

        SUMMARY:"""
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
        chain = load_summarize_chain(llm, chain_type="map_reduce", 
                                    map_prompt=PROMPT, combine_prompt=PROMPT)
        summary_output = chain({"input_documents": docs},return_only_outputs=True)["output_text"]
        summaries.append(summary_output)
        
    return summaries

In [29]:
summaries = summarize_pdfs_from_folder("./resume")

Summary for:  ./resume\Pratishtha Singh.pdf
 Pratishtha Singh is a student at Jaypee University of Engineering and Technology, studying Computer Science. She has experience with various languages and platforms, and has worked on various projects. Additionally, she has volunteered for Shiksha-Setu, provided support and resources to students in need, advocated for equal access to education and resources, and led initiatives successfully. She also conducted a donations drive and achieved a 13th rank in an online round and 4th rank among the top participants in a final round of a Hackathon (Codespire) in March 2023.




Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

Summary for:  ./resume\Sandali-CV.pdf
 Sandali Gupta is a final year B.Tech Computer Science student at Jaypee University of Engineering and Technology. He has strong problem-solving and analytical skills, and is experienced in project development. He has technical skills in Python, Java, CPP, HTML, CSS, Javascript, and frameworks such as Streamlit, Pandas, Numpy, Keras, Scikit-Learn, Matplotlib, and Flask. He also has soft skills such as problem solving, time management, creative thinking, and communication. He has participated in hackathons and created personal projects such as a property price prediction model and a Covid-19 detection model, and has held positions of responsibility and volunteered for various causes.




Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

Summary for:  ./resume\Vani Seth CV.pdf
 Vani Seth is a 3rd-year student in B.Tech, Computer Science and Engineering Program at Jaypee University of Engineering and Technology, Guna. She has experience in Machine Learning, Artificial Intelligence, and Data Science and has participated in various coding programs. She is proficient in many programming languages and has achieved various awards and recognitions. Her projects include Class Scheduler using AI, StegMed, and Email Spoofing Detection at the server.




In [23]:
import csv

data = []
for summary in summaries:
    lines = summary.split('\n')
    name = lines[0].split(": ")[1]
    summary_text = " ".join(lines[1:])
    data.append({"name": name, "summary": summary_text})

csv_filename = "resume_summaries.csv"

with open(csv_filename, mode='w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['name', 'summary']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    writer.writeheader()
    for entry in data:
        writer.writerow(entry)

print(f"CSV file '{csv_filename}' created successfully.")


IndexError: list index out of range

In [30]:
# Save all summaries into one .txt file
with open("summaries.txt", "w") as f:
    for summary in summaries:
        f.write(summary + "\n"*3)

Storing Skills

In [31]:
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFDirectoryLoader

In [32]:
loader = PyPDFDirectoryLoader("./resume/")

docs = loader.load()

# Create the vector store index
index = VectorstoreIndexCreator().from_loaders([loader])

In [33]:
import os
import re

cv_directory = "./resume/" 
cv_files = []

def remove_special_characters(text):
    return re.sub(r'[^\w\s]', '', text)

for filename in os.listdir(cv_directory):
    if filename.endswith(".pdf"):  
        pdf_name = os.path.splitext(filename)[0] 
        pdf_name = remove_special_characters(pdf_name)  
        pdf_name = pdf_name.replace("CV", "").replace("resume", "").replace("cv", "").replace("Resume", "").strip() 
        cv_files.append(pdf_name)

print("List of PDF file names:")
print(cv_files)

List of PDF file names:
['Pratishtha Singh', 'Sandali', 'Vani Seth']


In [34]:
query = "List of skills of ('Vani Seth')?"

index.query(query)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

" Vani Seth's skills include Machine Learning, Artificial Intelligence, Data Science, Streamlit, Pandas, Numpy, Keras, Scikit-Learn, Matplotlib, Flask, MySql, Problem Solving, Time Management, Creative Thinking, Communication, Data Analytics, Statistics, and Content Writing."

In [35]:
query = "What is the email of ('Sandali Gupta')?"

index.query(query)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-davinci-003 in organization org-nSLZbSvqQViJHZZYqUphiWkz on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/acco

' sandaligupta1@email.com'