In [1]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from tqdm.notebook import tqdm
import time
import itertools

In [2]:
local_path = "../pdf/Mateo Wheeler Resume v2.pdf"

if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
else:
    print("Upload a PDF file for processing.")

In [3]:
len(data[0].page_content)

6828

In [4]:
#Split and chunk the data
chunk_size = 150
chunk_overlap = 75

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = text_splitter.split_documents(data)

# Add the chunks to vector database, which uses nomic for model embeddings
vector_db = Chroma.from_documents(
                                    documents=chunks, 
                                    embedding=OllamaEmbeddings(model="nomic-embed-text"),
                                    collection_name="local-rag"
                                )

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [5]:
local_llm = "llama3.2"
llm = ChatOllama(model=local_llm)

# Set up a basic PromptTemplate as the backbones of the solution
QUERY_PROMPT = PromptTemplate(
    input_variables = ["question"],
        template="""As a recruiter, you need to evaluate if a candidate is a good fit for a role. 
                If you don't know the answer, just say that you don't know, don't try to make up an answer.
                Question: {question}
           """
)

retriever = MultiQueryRetriever.from_llm(vector_db.as_retriever(),llm, prompt=QUERY_PROMPT)

# use a ChatPromptTemplate to initiate a conversation, allowing the System to assume a Role
chat_template = """Answer the question based only on the following context: 
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(chat_template)

chain = (
    {"context":retriever, "question":RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [6]:
# copy company_description and job_description from LinkedIn job posts
company_description = """Since its founding in 1993, NVIDIA (NASDAQ: NVDA) has been a pioneer in accelerated computing. The company’s invention of the GPU in 1999 sparked the growth of the PC gaming market, redefined computer graphics, ignited the era of modern AI and is fueling the creation of the metaverse. NVIDIA is now a full-stack computing company with data-center-scale offerings that are reshaping industry.
"""

job_description = """Professional, long-term experience as a Data Scientist (focused on AI/ML) developing/supporting business facing projects.
8+ years of experience, including 5+ years of demonstrated ability in business-focused AI and Data Science. Job scope can be adjusted to accommodate more experienced candidates.
BS/MS/PhD or equivalent experience in Computer Science, Data Science, Electrical/Computer Engineering, Physics, Mathematics, other Engineering fields. Technical Master’s or Ph.D. with finance or business background is preferred.
Data Science project management, driving projects and coordinating across multidisciplinary teams inside the organization.
Strong technical skills, with a proven history of designing, validating, deploying, and maintaining data science models using Python, SQL, & Databricks (or similar).
Excellent communication skills, with the ability to maintain good documentation and present projects to technical and business partners.
                """

In [7]:
context = f"Role: you are a recruiter for {company_description}. The job position requirements are the following:\
            {job_description}\
        "

q = "Can you evaluate the candidate in this resume document for the role provided?"

response = chain.invoke(input={'context': context, 'question': q})
print(response)

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


Based on the provided context, I can evaluate the candidate's qualifications for the Data Scientist role at NVIDIA.

The candidate has a background as a Senior Data Scientist at Hellofresh from November 2021 to April 2025. In this role, they likely developed and supported business-facing projects involving AI/ML, which aligns with the job requirements.

According to the resume document, the candidate has:

* Experience in pre-trained neural networks (SBERT, transfer learning) for NLP topic modeling
* Familiarity with NLTK, sklearn, UMAP for data analysis
* Experience in project management using Agile Methodology, Atlassian Jira & Confluence, and People Management tools
* Strong technical skills in Python, SQL, and Databricks
* Excellent communication skills

The candidate also has a strong educational background, including a Bachelor's/Master's/PhD degree or equivalent experience in Computer Science, Data Science, Electrical/Computer Engineering, Physics, Mathematics, or other Engineer