**Notebook for Experimentation!!**
***

In [None]:
# pip install langchain-community langchain-chroma sentence-transformers pypdf mistralai langchain faiss-cpu pypdf sentence-transformers

In [None]:
import os
from dotenv import load_dotenv
from mistralai import Mistral
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [None]:
load_dotenv()               # reads .env file
api_key = os.getenv("MISTRAL_KEY")

if not api_key:
    raise ValueError("Missing MISTRAL_KEY in .env")
else:
    print('Key fetched')

In [None]:
client = Mistral(api_key=api_key)
MODEL = "mistral-small-latest"

In [None]:
# --------- Load PDFs ----------
def load_pdf(path):
    loader = PyPDFLoader(path)
    return loader.load()

In [None]:
def rag_impl(resume_docs, jd_docs):
    # 1. Direct Comparison for Match (More accurate than RAG)
    full_resume_text = "\n".join([d.page_content for d in resume_docs])
    full_jd_text = "\n".join([d.page_content for d in jd_docs])
    
    print("‚è≥ Analyzing match...")
    match_pct = get_match_percentage(full_resume_text, full_jd_text)
    print(f"\nüéØ Resume‚ÄìJD Match: {match_pct}%")

    if match_pct < 60:
        print("‚ùå Match below 60%. Candidate rejected.")
        return

    # 2. RAG only for deep question generation
    print("‚úÖ Match confirmed. Indexing documents for questions...")
    documents = resume_docs + jd_docs
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_documents(documents)
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(chunks, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 10}) # Increased k for better context

    questions = generate_questions(retriever)
    print("\n--- Generated Interview Questions ---")
    print(questions)

def get_match_percentage(resume_text, jd_text):
    # Clean up the prompt to handle full texts
# --------- Match Percentage Prompt ----------
    MATCH_PROMPT = f"""
    You are a Senior ATS Recruiter. Your task is to perform a strict 'Phase 1' screening.

    ### CRITICAL FILTERING RULES:
    1. ROLE ALIGNMENT: Identify the target job title in the JD and the candidate's professional identity in the Resume. 
       - If the candidate is a 'Graphic Designer' applying for 'Data Scientist', REJECT (Score < 10).
       - Do not allow 'transferable skills' to bypass a complete lack of core domain experience.
    2. EXPERIENCE GAP: 
       - If the JD requires 5+ years and the candidate has < 2 years, REJECT (Score < 30).
    3. SKILL SYNERGY:
       - Match must-have tools (e.g., Python, AWS, Docker). If the primary stack is missing, REJECT.

    ### SCORING SYSTEM:
    - 0-30: Total Mismatch (Wrong role or zero relevant experience)
    - 31-60: Weak Match (Right domain, but missing 50% of core tools/seniority)
    - 61-85: Strong Match (Has 80% of skills and correct seniority)
    - 86-100: Perfect Match (All skills + exact industry experience)

    RESUME: {resume_text}
    JD: {jd_text}

    OUTPUT: Provide ONLY the numerical score (0-100).
    """
    response = client.chat.complete(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}]
    )
    # Basic cleaning to ensure it's a number
    res_content = response.choices[0].message.content.strip().replace('%', '')
    try:
        return float(res_content)
    except:
        return 0.0

In [None]:
# --------- Question Generation Prompt ----------
QUESTION_PROMPT = """
You are a Senior Technical Interviewer. Your goal is to conduct a deep-dive technical assessment.

TASK:
Based on the provided CONTEXT (which includes both the Job Description and the Candidate's Resume), generate exactly 10 high-quality interview questions.

RULES:
- Be specific: Reference actual technologies and project names found in the context.
- No Fluff: Do not provide answers, introductions, or feedback.
- Difficulty: Senior-level. Focus on "Why" and "How" rather than "What is".

STRUCTURE:

### 1. Technical/Skill-Based Questions (5 questions)
Focus on the intersection of the JD requirements and the candidate's stated expertise. Challenge their understanding of the tools they claim to know.

### 2. Project-Based Questions (3 questions)
Select the most relevant projects from the resume. Ask about architecture, technical trade-offs, or specific challenges mentioned.

### 3. Scenario-Based Questions (2 questions)
Create hypothetical technical hurdles the candidate would face *in this specific role* based on the JD's responsibilities.

CONTEXT:
{context}
"""

In [None]:
def generate_questions(retriever):
    jd_docs     = retriever.invoke("core job responsibilities key requirements must-have skills technologies")
    resume_docs = retriever.invoke("candidate projects experiences achievements tools used skills demonstrated")
    combined    = jd_docs[:4] + resume_docs[:4]   # bias toward 4+4 or adjust
    context = "\n\n".join([d.page_content for d in combined])

    response = client.chat.complete(
        model=MODEL,
        messages=[
            {"role": "user", "content": QUESTION_PROMPT.format(context=context)}
        ]
    )
    return response.choices[0].message.content

In [None]:
# --------- Pipeline ----------
if __name__ == "__main__":
    
    print('in main')

  # Get file paths from user
    resume_path = r'C:\Users\Admin\Downloads\Git_Clone\AI-Tools\AI_Interview\data\Rajat__Sharma_AI_ML.pdf' # input("Enter resume PDF path (e.g. Rajat__Sharma_AI_ML.pdf): ").strip()
    jd_path = r'C:\Users\Admin\Downloads\Git_Clone\AI-Tools\AI_Interview\data\JD_ML.pdf' # input("Enter Job Description PDF path: ").strip()

    # Loading pdf files
    resume_docs = load_pdf(resume_path)
    jd_docs = load_pdf(jd_path)

    print('Documents fetched!')

    # Calling
    rag_impl(resume_docs, jd_docs)

    print("\nProcessing...\n")