In [1]:
import os
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from pathlib import Path 
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser 
from langchain_huggingface import HuggingFaceEmbeddings  
from langchain_community.vectorstores import Chroma  
# path configuration 

BASE_DIR = Path.cwd().parent   # project root
data_path = BASE_DIR / "data"
db_path = BASE_DIR / "vector_db" 

# 1. the brain 
os.environ["HUGGINGFACEHUB_API_TOKEN"]= "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"
base = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2", 
    temperature=0.5, 
    
)  # with 0.5 tempreture the answers will be objective 
llm = ChatHuggingFace(llm = base)   

# 2. we connect to our exsiting vector database 
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")  
vectorstore =  Chroma(persist_directory=str(db_path), embedding_function=embeddings) 
retriver = vectorstore.as_retriever(search_kwargs = {"k": 4})  

# 3. controlled prompt 
template = """
You are a Technical Recruiter performing an evidence-based CV analysis.

You MUST follow these rules strictly:
- Use ONLY information explicitly stated in the CONTEXT.
- Do NOT infer years of experience unless a duration is explicitly written.
- Do NOT assign seniority labels (e.g., “experienced”, “senior”, “expert”) unless explicitly stated.
- If information is missing, write: “Not specified in CV”.
- Never generalize beyond projects, coursework, or student experience if the candidate is a student.

========================
CV EVIDENCE (SOURCE OF TRUTH)
========================
{context}

========================
ANALYSIS TASK
========================
Answer the user question below using ONLY verifiable evidence from the CV.

{question}

========================
RESPONSE FORMAT (STRICT)
========================

1. Candidate Profile
- Current status (e.g., Student, Graduate) — must match CV wording
- Field of study and institution
- Career objective if stated

2. Verified Technical Skills (Evidence-Based)
For each skill:
- Skill name
- Context of use 
- Duration → ONLY if explicitly stated, otherwise “Not specified in CV”

Example:
- Python — Used in academic and personal data science projects (duration not specified in CV)

3. Project & Practical Experience
- List projects explicitly mentioned
- Describe objectives and techniques used
- Avoid professional role language if not stated

4. Skill Gaps / Missing Evidence
- List relevant skills NOT found in the CV
- Do not speculate or assume future capability

5. Hiring Signal (Student / Junior Context)
Choose ONE:
- Strong Junior Profile
- Potential Junior Profile
- Not a Fit for Junior Roles 
- senior role

Justify in ONE sentence using CV evidence only.

========================
PROHIBITIONS
========================
- No invented experience
- No seniority inflation
- No assumptions about industry experience
- No extrapolation beyond CV text


"""  
prompt = ChatPromptTemplate.from_template(template)
# the modern chain 

def candidate_analyzer(candidate_id: str, question: str):
    retriever = vectorstore.as_retriever(
        search_kwargs={"k": 4, "filter": {"candidate_id": candidate_id}}
    )

    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    return chain.invoke(question)

 


if __name__ == "__main__":
    # Example Production Queries:
    print(candidate_analyzer("juliette_daher_cv","tell me the name of the candidate and Give me a 3-sentence executive summary of juliette daher cv.")) 

""" 
other query exampless : 
# One or more candidate profiles (like the text you used)

Your system can answer:

“What are this candidate’s main strengths?”

“Is this profile suitable for a data science role?”

“What skills are missing for ML engineer?”

“Give a one-sentence executive summary”   

 """

  from .autonotebook import tqdm as notebook_tqdm
  vectorstore =  Chroma(persist_directory=str(db_path), embedding_function=embeddings)


 1. Candidate Profile
- Current status: Student
- Field of study: Data Science
- Career objective: Not specified in CV

2. Verified Technical Skills (Evidence-Based)
- Customer Service: Used in various roles, including Salesperson at The Grinder Coffee Shop and Cashier-Customer Service Associate at Gray-Mackenzie
- Teamwork: Led and organized a team of 6 members at Farmasi
- Communication: Proficient in English and French
- Computer Handling: Not specified in CV
- Telephone Handling: Not specified in CV

3. Project & Practical Experience
- Not specified in CV

4. Skill Gaps / Missing Evidence
- Programming languages: No explicit mention in CV
- Data Science tools and techniques: No evidence of use in practical experience or education

5. Hiring Signal (Student / Junior Context)
- Potential Junior Profile: Has experience in customer service and team management, currently pursuing a degree in Data Science.


' \nother query exampless : \n# One or more candidate profiles (like the text you used)\n\nYour system can answer:\n\n“What are this candidate’s main strengths?”\n\n“Is this profile suitable for a data science role?”\n\n“What skills are missing for ML engineer?”\n\n“Give a one-sentence executive summary”   \n\n '

In [8]:
# list candidate_ids stored in Chroma
items = vectorstore._collection.get(include=["metadatas"])
candidate_ids = sorted({m.get("candidate_id") for m in items["metadatas"] if m.get("candidate_id")})
print(candidate_ids)


['juliette_daher_CV', 'justin awad cv']
