# Semantic Search Through Resumes for HR Tech Startups Using Qdrant

## Utilise Qdrant, Langchain and OpenAI to narrow down on top resumes for the job


In [None]:
! pip install qdrant-client langchain openai pypdf

In [2]:
import os
import getpass
from operator import itemgetter

from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.vectorstores import Qdrant
from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.schema import format_document

from langchain_core.runnables import RunnableParallel


os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [3]:
# get pdf resumes as text documents
loader = PyPDFDirectoryLoader("../data/INFORMATION-TECHNOLOGY")
docs = loader.load()
print(len(docs))


247


In [7]:
# initialise embeddings used to convert text to vectors
embeddings = OpenAIEmbeddings()

# create a qdrant collection - a vector based index of all resumes
qdrant_collection = Qdrant.from_documents(
    docs,
    embeddings,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="it_resumes",
)

# construct a retriever on top of the vector store
qdrant_retriever = qdrant_collection.as_retriever()


In [98]:

# Now we define and initialise the components of an LLM chain, beginning with the prompt template and the model.

template = """You are a helpful assistant to a recruiter at a technology firm. You are be provided the following input context \
    from a dataset of resumes of IT professionals. Answer the question based only on the context. Also provide the  \
    source documents.
{resume}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model  = ChatOpenAI(temperature=0, model='gpt-3.5-turbo-16k-0613')



In [99]:
# Construct the chain, with two variables -  context and question to be passed onto the prompt.
chain = (
    {"resume": RunnablePassthrough()|qdrant_retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)



In [100]:
chain.invoke('Which resumes mention both Java and HTML?')


"The resumes that mention both Java and HTML are the following:\n\n1. Document 0: INFORMATION TECHNOLOGY INTERN (TEST AUTOMATION ENGINEER)\n2. Document 2: Skills - Microsoft Word, Excel, Access, SQL Server, VBA, Visual Studio\n\nSource documents:\n- Document 0: '../data/INFORMATION-TECHNOLOGY/83816738.pdf'\n- Document 2: '../data/INFORMATION-TECHNOLOGY/20001721.pdf'"

In [101]:
chain.invoke('Which resumes which mention working with Cisco technologies.')

'The resumes that mention working with Cisco technologies are:\n\n1. Document 1: The first document mentions administering CISCO multi-VLAN layer 3 switched LAN/WAN, designing and administering CISCO VPN solution, and working with CISCO switches.\n\n2. Document 2: The second document mentions skills in Cisco switching and Cisco routers.\n\n3. Document 4: The fourth document mentions experience with Cisco hardware and software business solutions, Cisco command line IOS, and Cisco switches.\n\nSources:\n- Document 1: ../data/INFORMATION-TECHNOLOGY/20879311.pdf\n- Document 2: ../data/INFORMATION-TECHNOLOGY/91635250.pdf\n- Document 4: ../data/INFORMATION-TECHNOLOGY/31243710.pdf'

In [103]:
chain.invoke('Find me some resumes who have recent experience at Director level')

'Based on the provided input context, here are some resumes of IT professionals who have recent experience at the Director level:\n\n1. Resume: INFORMATION TECHNOLOGY DIRECTOR\n   - Experience: April 1999 to January 2015\n   - Company Name: Not specified\n   - Source: ../data/INFORMATION-TECHNOLOGY/24038620.pdf\n\n2. Resume: INFORMATION TECHNOLOGY SENIOR MANAGER\n   - Experience: April 2013 to February 2015\n   - Company Name: Not specified\n   - Source: ../data/INFORMATION-TECHNOLOGY/17681064.pdf\n\nPlease note that the specific company names are not provided in the input context.'

In [111]:
chain.invoke('Find resumes which mention experience working in the telecom domain?')

'Based on the provided input context, the following resumes mention experience working in the telecom domain:\n\n1. Document: "../data/INFORMATION-TECHNOLOGY/10839851.pdf"\n   - Page Content: "Bachelor of Science, Telecommunications Management"\n   - Page Content: "Certifications: Cisco Certified Network Associate (CCNA/CCAI)"\n\n2. Document: "../data/INFORMATION-TECHNOLOGY/15297298.pdf"\n   - Page Content: "Telecommunications Management"\n   - Page Content: "telecommunication, phone, employee development, VoIP"\n\nPlease note that the mentioned resumes are based on the provided input context and there may be additional resumes in the dataset that mention experience working in the telecom domain.'