## RAG demo
### Match resume with a list of JD (from Adzuna)

In [2]:
from langchain_community.document_loaders import SeleniumURLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_community.document_loaders import Docx2txtLoader
from dotenv import load_dotenv
import os

import requests

### Initiate vector store model and llm model

In [3]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = InMemoryVectorStore(embeddings)


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model = OllamaLLM(model="llama3.2")

### Job query - Adzuna

In [5]:
load_dotenv()  # Loads variables from .env into environment

app_id = os.getenv("app_id")
app_key = os.getenv("app_key")

In [53]:

# Define the endpoint and query parameters
url = 'http://api.adzuna.com/v1/api/jobs/gb/search/1'

params = {
    'app_id': app_id,
    'app_key': app_key,
    'results_per_page': 20,
    'what': 'data scientist, remote',
    'content-type': 'application/json'
}

# Make the GET request
response = requests.get(url, params=params)

# initiate langchain documents
documents = []

# Check if it worked
if response.status_code == 200:
    data = response.json()  # Parse JSON response
    for job in data.get('results', []):
        content = f"Title: {job.get('title')}\n\nDescription: {job.get('description')}"
        metadata = {
            "id": job.get("id"),
            "company": job.get("company", {}).get("display_name"),
            "location": job.get("location", {}).get("display_name"),
            "salary_min": job.get("salary_min"),
            "salary_max": job.get("salary_max"),
            "url": job.get("redirect_url")
        }

        documents.append(Document(page_content=content, metadata=metadata))

    # documents now contains LangChain Document objects
    print(f"Loaded {len(documents)} job listings as LangChain Documents.")
else:
    print("Error:", response.status_code, response.text)

Loaded 20 job listings as LangChain Documents.


In [54]:
documents

[Document(metadata={'id': '5141371904', 'company': 'Arthur', 'location': 'London, UK', 'salary_min': 50000, 'salary_max': 90000, 'url': 'https://www.adzuna.co.uk/jobs/land/ad/5141371904?se=1Moq3m8Z8BGheL0mXCZXLA&utm_medium=api&utm_source=ad3ef867&v=A9C7749B3BA26041D8BF40DF4888CE2A4A1853A8'}, page_content='Title: Pricing Data Scientist Remote\n\nDescription: I am working with an emerging Insurtech looking for a technical Pricing/data science professional. Seeking a proactive individual with a passion for data to join a small, high-performing team and play a key role in driving the business forward. They use an in-house pricing system combining traditional Emblem GLM models with machine learning models developed in Python.My client has an office based in London however, this role can be fully remote.Required skills: Proactive self-starter with a can…'),
 Document(metadata={'id': '5099715685', 'company': 'Vertech Group  Ltd', 'location': 'Clerkenwell, Central London', 'salary_min': 70000,

### Chunk the text: no need since it's short

In [38]:
# def split_text(documents):
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=1000,
#         chunk_overlap=200,
#         add_start_index=True
#     )
#     data = text_splitter.split_documents(documents)

#     return data


### Index docs

In [50]:
vector_store.add_documents(documents=documents)

['01ed6e7e-bd4f-461d-8b3f-bfe02b0b55c9',
 'b37ea729-0263-4273-924c-c77456972acc',
 'd1e695a3-3fb8-4059-bfd8-77d90e60de82',
 'b3d00db7-52f3-4e2c-a545-64abbe4ab625',
 'bd752547-8777-48e3-b65b-4b1996e75fcf',
 'ba8fc88f-cb5e-4c01-b877-a77d4921331e',
 'f234df91-1efa-4b36-a016-9ad65df0acd2',
 '98772699-a525-45b7-a6a8-1bd244f9e163',
 'f21314a6-96de-4b2a-9ed0-65279200b69b',
 '7c3c938e-57d8-4723-9a3e-6066c7180b5d',
 '4e850d1b-6cc5-4049-9ba0-3a69d92abf42',
 '82a5397b-21ad-464c-b475-ddb540e3d095',
 'e7c6182a-ff4a-4fb2-ae44-0b91dd482e62',
 '3def8e07-9ee0-474d-98b2-da4f0a232fb5',
 'a76de745-bb3a-4bb5-bc5b-794acbdff1bc',
 '6d6fce66-99d4-4b67-a057-f7c8f5107260',
 'b8a937cb-91bc-4136-9786-a8f235e303c5',
 '21097689-043d-4a54-bf49-fa5e29848fbf',
 '9b40b079-e8a8-42a4-8485-3f96866c92d5',
 '6fb51a35-ec7a-416e-8228-72846f032dc9']

### Query and retrieval

In [40]:
template = """
You are an assistant for job matching tasks. Based on how it align with the query (user's resume), order the retrieved jobs (context) and explain how you ranked. Returned a list of job titles of top 3 matches and their link. ”
Question: {question} 
Context: {context} 
Answer:
"""

### Load and index resume

In [41]:
# load the doc
loader = Docx2txtLoader("resume.docx")

query = loader.load()

query

[Document(metadata={'source': 'resume.docx'}, page_content='YIJIN BAO\n\n\t\t+1-773-490-7200\n\n\t\tChicago, Illinois\n\n\t\tkay062@uchicago.edu, LinkedIn | GitHub\n\n\n\n\n\neducation\n\n\tHarris School of Public Policy, University of Chicago\tChicago, IL\n\n\tData Analytics Specialization of Master of Public Policy (MPP), Graduate with Honor\tJune 2024\n\nData Science: ETL pipeline, NLP, Deep Learning, Machine Learning, Computer Vision\n\nData Analytics: data collection, management, cleaning, manipulation, sanity check, analysis, visualization, GIS\n\nPublic Policy analysis: modeling and research for transportation policy, healthcare policy, urban policy\n\nBusiness data product: dashboard design, interactive chatbot, database management, automated workflows\n\n\n\n\tShanghai International Studies University\tShanghai, China\n\n\tDual Bachelor of Laws and English, Outstanding Undergraduate Thesis Honor\tJuly 2022\n\nInternational Law; International Economics Law; International Financ

In [42]:
# Assume query is a LangChain Document
if isinstance(query[0], Document):
    query_text = query[0].page_content
else:
    query_text = query[0]  # If it's already a string

### Invoke answer

In [51]:
# Retrieval top 5 jobs based on similarity search
retrieve_documents = vector_store.similarity_search(query_text, k=5)
retrieve_documents[0]

context = "\n\n".join([doc.page_content for doc in retrieve_documents])

In [52]:
retrieve_documents

[Document(id='d18eb379-aad2-42cc-8272-9180733c1b2c', metadata={'id': '5120993680', 'company': 'Searchability  Ltd', 'location': 'Cheltenham, Gloucestershire', 'salary_min': 45000, 'salary_max': 75000, 'url': 'https://www.adzuna.co.uk/jobs/details/5120993680?utm_medium=api&utm_source=ad3ef867'}, page_content='Title: Data Scientist\n\nDescription: Salary: Up to £75k DOE  Bonuses & Benefits Location: Cheltenham (3 days on-site per week) Security Clearance: Active Enhanced DV (West) Required Key Skills: Machine Learning, AWS/Azure, Python, NLP, AI Who We Are? We are seeking a highly skilled Data Scientist with enhanced DV clearance to join a globally recognised leader in technology, consulting, and engineering services. Working across high-impact public and private sector projects, this role offers excellent career progression and the oppo…'),
 Document(id='d3dfc717-1194-4ea9-b812-263290a4d647', metadata={'id': '5125861695', 'company': 'Keller Executive Search', 'location': 'Camden Town, N

In [45]:
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | model
chain.invoke({"question": query_text, "context": context})

"Based on the provided user's resume, I have retrieved three job matches that align closely with their skills and experience. Here are the top 3 matches along with their descriptions and links:\n\n1. **Title: Data Scientist**\n**Description:** Salary: Up to £75k DOE Bonuses & Benefits Location: Cheltenham (3 days on-site per week) Security Clearance: Active Enhanced DV (West) Required Key Skills: Machine Learning, AWS/Azure, Python, NLP, AI\n**Link:** [https://www.relay.co.uk/](https://www.relay.co.uk/)\nThis job match aligns with the user's expertise in machine learning, data science, and programming languages like Python. The role requires a high level of security clearance, which is also a relevant skill for the user.\n\n2. **Title: Data Scientist**\n**Description:** Salary: Competitive Location: London\n**Link:** [https://www.keller.co.uk/](https://www.keller.co.uk/)\nThis job match aligns with the user's experience in data analysis and visualization, as well as their skills in mac

In [47]:
retrieve_documents

[Document(id='d18eb379-aad2-42cc-8272-9180733c1b2c', metadata={'id': '5120993680', 'company': 'Searchability  Ltd', 'location': 'Cheltenham, Gloucestershire', 'salary_min': 45000, 'salary_max': 75000, 'url': 'https://www.adzuna.co.uk/jobs/details/5120993680?utm_medium=api&utm_source=ad3ef867'}, page_content='Title: Data Scientist\n\nDescription: Salary: Up to £75k DOE  Bonuses & Benefits Location: Cheltenham (3 days on-site per week) Security Clearance: Active Enhanced DV (West) Required Key Skills: Machine Learning, AWS/Azure, Python, NLP, AI Who We Are? We are seeking a highly skilled Data Scientist with enhanced DV clearance to join a globally recognised leader in technology, consulting, and engineering services. Working across high-impact public and private sector projects, this role offers excellent career progression and the oppo…'),
 Document(id='d3dfc717-1194-4ea9-b812-263290a4d647', metadata={'id': '5125861695', 'company': 'Keller Executive Search', 'location': 'Camden Town, N

"Based on the provided context, I found a job that suits YIJIN BAO's skills and experience:\n\nTitle: Data Scientist\n\nLocation: Cheltenham (3 days on-site per week)\n\nSecurity Clearance: Active Enhanced DV (West) Required\n\nKey Skills: Machine Learning, AWS/Azure, Python, NLP, AI\n\nThe job description mentions the use of advanced statistical methods, machine learning algorithms, and data visualization techniques to tackle complex data challenges. YIJIN BAO's skills in machine learning, Python, NLP, and SQL align with the job requirements.\n\nAdditionally, the job description mentions working with large datasets, conducting analyses, and presenting findings, which are also areas where YIJIN BAO has demonstrated expertise through their experience as a Research Analysis Intern at Illinois Criminal Justice Information Authority."


---------------------------------------------------------------------------

"Based on the provided user's resume, I have retrieved three job matches that align closely with their skills and experience. Here are the top 3 matches along with their descriptions and links:\n\n1. **Title: Data Scientist**\n**Description:** Salary: Up to £75k DOE Bonuses & Benefits Location: Cheltenham (3 days on-site per week) Security Clearance: Active Enhanced DV (West) Required Key Skills: Machine Learning, AWS/Azure, Python, NLP, AI\n**Link:** [https://www.relay.co.uk/](https://www.relay.co.uk/)\nThis job match aligns with the user's expertise in machine learning, data science, and programming languages like Python. The role requires a high level of security clearance, which is also a relevant skill for the user.\n\n2. 

**Title: Data Scientist**\n**Description:** Salary: Competitive Location: London\n**Link:** [https://www.keller.co.uk/](https://www.keller.co.uk/)\nThis job match aligns with the user's experience in data analysis and visualization, as well as their skills in machine learning and programming languages like Python.\n\n3. 

**Title: Data Scientist**\n**Description:** Salary: £70k - £90k DOE Location: Cheltenham\n**Link:** [https://www.relay.co.uk/](https://www.relay.co.uk/)\nThis job match aligns with the user's expertise in data science, machine learning, and programming languages like Python. The role requires a high level of security clearance, which is also relevant for the user.\n\nI ranked these jobs based on the following factors:\n\n* Relevance: Alignment with the user's skills and experience\n* Salary: Comparable salary ranges to the user's current or expected salary\n* Location: Proximity to the user's location or willingness to relocate\n* Security Clearance: Matching security clearance requirements, which is relevant for the user's profile\n\n

Please note that these job matches are based on the provided resume and may not be exhaustive. It's essential to research each job thoroughly and tailor your application to showcase your skills and experience."