In [18]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI
from tqdm.notebook import tqdm
import time
import itertools
from ragas import EvaluationDataset, evaluate
from ragas.run_config import RunConfig
from ragas.metrics import faithfulness, answer_relevancy, answer_correctness, context_precision, context_recall, answer_similarity
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from datasets import Dataset
import json
import os
from dotenv import load_dotenv

In [6]:
load_dotenv()
# os.environ("OPENAI_API_KEY")

True

In [4]:
local_path = "../pdf/Mateo Wheeler Resume v2.pdf"

if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
else:
    print("Upload a PDF file for processing.")

In [7]:
len(data[0].page_content)

6828

In [8]:
#Split and chunk the data
chunk_size = 150
chunk_overlap = 75

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = text_splitter.split_documents(data)

# Add the chunks to vector database, which uses nomic for model embeddings
vector_db = Chroma.from_documents(
                                    documents=chunks, 
                                    embedding=OllamaEmbeddings(model="nomic-embed-text"),
                                    collection_name="local-rag"
                                )

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [9]:
local_llm = "llama3.2"
llm = ChatOllama(model=local_llm)

# Set up a basic PromptTemplate as the backbones of the solution
QUERY_PROMPT = PromptTemplate(
    input_variables = ["question"],
        template="""As a recruiter, you need to evaluate if a candidate is a good fit for a role. 
                If you don't know the answer, just say that you don't know, don't try to make up an answer.
                Question: {question}
           """
)

retriever = MultiQueryRetriever.from_llm(vector_db.as_retriever(),llm, prompt=QUERY_PROMPT)

# use a ChatPromptTemplate to initiate a conversation, allowing the System to assume a Role
chat_template = """Answer the question based only on the following context: 
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(chat_template)

chain = (
    {"context":retriever, "question":RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [10]:
# copy company_description and job_description from LinkedIn job posts
company_description = """Since its founding in 1993, NVIDIA (NASDAQ: NVDA) has been a pioneer in accelerated computing. The company’s invention of the GPU in 1999 sparked the growth of the PC gaming market, redefined computer graphics, ignited the era of modern AI and is fueling the creation of the metaverse. NVIDIA is now a full-stack computing company with data-center-scale offerings that are reshaping industry.
"""

job_description = """Professional, long-term experience as a Data Scientist (focused on AI/ML) developing/supporting business facing projects.
8+ years of experience, including 5+ years of demonstrated ability in business-focused AI and Data Science. Job scope can be adjusted to accommodate more experienced candidates.
BS/MS/PhD or equivalent experience in Computer Science, Data Science, Electrical/Computer Engineering, Physics, Mathematics, other Engineering fields. Technical Master’s or Ph.D. with finance or business background is preferred.
Data Science project management, driving projects and coordinating across multidisciplinary teams inside the organization.
Strong technical skills, with a proven history of designing, validating, deploying, and maintaining data science models using Python, SQL, & Databricks (or similar).
Excellent communication skills, with the ability to maintain good documentation and present projects to technical and business partners.
                """

In [11]:
context = f"Role: you are a recruiter for {company_description}. The job position requirements are the following:\
            {job_description}\
        "

q = "Can you evaluate the candidate in this resume document for the role provided?"

response = chain.invoke(input={'context': context, 'question': q})
print(response)

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


Based on the context of the resume document and the job requirements, I can provide an evaluation of the candidate's qualifications.

The candidate has approximately 8 years of experience as a Senior Data Scientist at AirDNA and Hellofresh. They have worked on various projects, including developing Quasi-Experimental Evaluation Design (QED) for customer touchpoint hypothesis testing, creating data visualization dashboards, and managing the Data Science team. The candidate has also demonstrated expertise in machine learning, business analytics, and data visualization.

The candidate's technical skills align well with the job requirements, as they have experience with Python, SQL, Databricks, Tableau, Power BI, and other relevant tools. They also have a strong background in statistics and programming languages such as PySpark, SAS, and NLTK.

The candidate's experience in business-focused AI and data science is extensive, and they have a proven track record of driving projects and coordi

#### Evaluate LLM responses using Ragas

In [12]:
# Set up a basic PromptTemplate as the backbones of the solution
QUERY_PROMPT = PromptTemplate(
    input_variables = ["question"],
        template="""As a job recruiter, you want to understand a candidate's resume, but not specific to a job position. 
                If you don't know the answer, just say that you don't know, don't try to make up an answer.
                Question: {question}
           """
)

retriever = MultiQueryRetriever.from_llm(vector_db.as_retriever(),llm, prompt=QUERY_PROMPT)

# use a ChatPromptTemplate to initiate a conversation, allowing the System to assume a Role
chat_template = """Answer the question based only on the following context: 
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(chat_template)

chain = (
    {"context":retriever, "question":RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [13]:
context0 = f"Role: you are a generic recruiter reading a candidate's resume."
q0 = "Based on the dates listed for each position, how many years of experience does this candidate have?"

response0 = chain.invoke(input={'context': context0, 'question': q0})
print(response0)

To calculate the total years of experience, let's break it down:

1. PricewaterhouseCoopers (2010-2016): 6 years
2. Master of Science in Statistics (2007-2009): 2 years

Adding both together: 6 + 2 = 8 years

So, this candidate has approximately 8 years of experience.


In [14]:
q1 = "Based on this resume, what machine libraries is the candidate experienced with?"

response1 = chain.invoke(input={'context': context0, 'question': q1})
print(response1)

The candidate is experienced with the following machine learning/libraries:

1. SBERT
2. NLTK
3. scikit-learn
4. UMAP
5. PyTorch
6. TensorFlow
7. Nixtla TimeGPT
8. Sagemaker DeepAR
9. Catboost

These libraries are used for tasks such as natural language processing, deep learning, reinforcement learning, and other machine learning-related tasks.


In [15]:
data_samples = {
    'question': [
         #q0, 
         q1
    ],
    'answer': [
         #response0.replace("\n", ""), 
         response1.replace("\n", "")

    ],
    'contexts': [
        [
            context0
        ],
        #[
        #    context0
        #]
    ],
    'ground_truth': [
        #'The candidate has a total of 18.5 years of experience.',
        'Tensorflow, PyTorch, scikit-learn, Prophet, Nixtla TimeGPT, Statsmodels, Sagemaker DeepAR, Catboost, sklearn, NLTK, Spacy, Word2Vec, SBERT/HuggingFace, OpenAI, Langchain, YOLO,  Deep learning, Reinforcement Learning, RAG Systems'
    ]
}

dataset = Dataset.from_dict(data_samples)

json_formatted_string = json.dumps(data_samples, indent=4)
print(json_formatted_string)

{
    "question": [
        "Based on this resume, what machine libraries is the candidate experienced with?"
    ],
    "answer": [
        "The candidate is experienced with the following machine learning/libraries:1. SBERT2. NLTK3. scikit-learn4. UMAP5. PyTorch6. TensorFlow7. Nixtla TimeGPT8. Sagemaker DeepAR9. CatboostThese libraries are used for tasks such as natural language processing, deep learning, reinforcement learning, and other machine learning-related tasks."
    ],
    "contexts": [
        [
            "Role: you are a generic recruiter reading a candidate's resume."
        ]
    ],
    "ground_truth": [
        "Tensorflow, PyTorch, scikit-learn, Prophet, Nixtla TimeGPT, Statsmodels, Sagemaker DeepAR, Catboost, sklearn, NLTK, Spacy, Word2Vec, SBERT/HuggingFace, OpenAI, Langchain, YOLO,  Deep learning, Reinforcement Learning, RAG Systems"
    ]
}


In [20]:
# llm_eval = ChatOllama(model=local_llm, temperature=0)

# attempt using llama embeddings as well
# llm_llama3 = ChatOllama(model="llama3.2",verbose=False,timeout=600,num_ctx=8192,disable_streaming=False, temperature=0)
# embeddings_llama3 = OllamaEmbeddings(model="llama3.2")

# evaluator_llm = LangchainLLMWrapper(llm_llama3)
# evaluator_embed = LangchainEmbeddingsWrapper(embeddings_llama3)

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))
evaluator_embed = LangchainEmbeddingsWrapper(OllamaEmbeddings(model="nomic-embed-text"))

# Test if embeddings wrapper works
try:
    test_embedding = evaluator_embed.embed_query("test text")
    print(f"Embedding dimension: {len(test_embedding)}")
except Exception as e:
    print(f"Embeddings wrapper error: {e}")

Embedding dimension: 768


In [21]:

# Evaluate using Ragas
result = evaluate(
    dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        #context_recall
        answer_correctness,
        answer_similarity,
    ],
    llm=evaluator_llm,
    embeddings=evaluator_embed,
    run_config=RunConfig(timeout=300, 
                         max_retries=3, # Reduce retries initially
                         max_wait=300, 
                         log_tenacity=True # Enable logging for debugging
    ),
    raise_exceptions=False # Don't stop on first error
)

print(result)

Evaluating:   0%|          | 0/5 [00:00<?, ?it/s]

{'context_precision': 0.0000, 'faithfulness': 0.0000, 'answer_relevancy': 0.8479, 'answer_correctness': 0.6410, 'semantic_similarity': 0.7861}
