In [1]:
import os
import pdfplumber
import json

In [2]:
os.chdir("Resumes")

In [3]:
# Listing all resumes
files = os.listdir()

# Print the list of files
for file in files:
    print(file)

Amrutham_Lakhmi_Himaja.pdf
Sireesha_Resume.pdf
Prathyush_Resume.pdf
Abhinav_Gurram.pdf
Karthik _Joshi S_Resume.pdf
Karthik_Kaiplody.pdf


In [4]:
resume_contents = []
for file in files:
    with pdfplumber.open(file) as pdf:
        resume_contents.append(pdf.pages[0].extract_text())

In [5]:
full_text = "".join(resume_contents[0])
print(full_text[:100])

Lakshmi Himaja Amrutham
IL,USA • (312) 7742876 • alakshmihimaja@hawk.iit.edu • linkedin.com/in/laksh


## Generating summaries for each resume

In [6]:
from langchain_anthropic import ChatAnthropic
from langchain.prompts import ChatPromptTemplate
from langchain.chains import SequentialChain
from langchain.chains import SimpleSequentialChain, LLMChain, SequentialChain

In [7]:
# Using Anthropic Claude model
claude = ChatAnthropic(temperature=0.9, 
                       api_key="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", 
                       model_name="claude-3-5-sonnet-20241022")


#### Building sequential chains

In [27]:
gen_json_prompt = ChatPromptTemplate.from_template(
    "Structure the following resume content in JSON format"
    "\n\n{resume_content}")
# chain 1: input= resume_content and output= resume content in structured json format
chain_one = LLMChain(llm=claude, prompt=gen_json_prompt, output_key="resume_json")

summary_prompt = ChatPromptTemplate.from_template(
    "Generate a 300 words summary for the resume content in JSON format below." 
    "Highlight all the skills, education, certifications if any, responsibilities and publications." 
    "\n\n{resume_json}"
)
# chain 2: input= English_Review and output= summary
chain_two = LLMChain(llm=claude, prompt=summary_prompt, output_key="resume_summary")

# constructing the sequential chain
resume_summary_chain = SequentialChain(chains=[chain_one, chain_two],
                                       input_variables=["resume_content"],
                                       output_variables = ["resume_summary"],
                                       verbose=False)

In [28]:
resume_summaries = []
for r_content in resume_contents:
    res = resume_summary_chain(r_content)
    resume_summaries.append(res["resume_summary"])

In [44]:
# Storing the resumes for future use
for r_summary in resume_summaries:
    r_summary = r_summary[42:]
    name = "".join(r_summary.split()[:2])
    with open(f"summary/{name}_resume_summary.txt", "w") as file:
        file.write(r_summary)

## Saving the Resume summaries in Vector Data store Weaviate

In [45]:
!pip install weaviate-client

Collecting weaviate-client
  Downloading weaviate_client-4.9.4-py3-none-any.whl.metadata (3.6 kB)
Collecting validators==0.34.0 (from weaviate-client)
  Downloading validators-0.34.0-py3-none-any.whl.metadata (3.8 kB)
Collecting authlib<1.3.2,>=1.2.1 (from weaviate-client)
  Downloading Authlib-1.3.1-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting grpcio<2.0.0,>=1.57.0 (from weaviate-client)
  Downloading grpcio-1.68.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting grpcio-tools<2.0.0,>=1.57.0 (from weaviate-client)
  Downloading grpcio_tools-1.68.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.3 kB)
Collecting grpcio-health-checking<2.0.0,>=1.57.0 (from weaviate-client)
  Downloading grpcio_health_checking-1.68.0-py3-none-any.whl.metadata (1.1 kB)
Collecting protobuf<6.0dev,>=5.26.1 (from grpcio-health-checking<2.0.0,>=1.57.0->weaviate-client)
  Downloading protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (5