**Notebook for Experimentation!!**
***

In [None]:
# pip install langchain-community langchain-chroma sentence-transformers pypdf mistralai langchain faiss-cpu pypdf sentence-transformers

In [None]:
import os
from dotenv import load_dotenv
from mistralai import Mistral
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [None]:
load_dotenv()               # reads .env file
api_key = os.getenv("MISTRAL_KEY")

if not api_key:
    raise ValueError("Missing MISTRAL_KEY in .env")
else:
    print('Key fetched')

In [None]:
client = Mistral(api_key=api_key)
MODEL = "mistral-small-latest"

In [None]:
# --------- Load PDFs ----------
def load_pdf(path):
    loader = PyPDFLoader(path)
    return loader.load()

In [None]:
def rag_impl(resume_docs, jd_docs):
    # 1. Direct Comparison for Match (More accurate than RAG)
    full_resume_text = "\n".join([d.page_content for d in resume_docs])
    full_jd_text = "\n".join([d.page_content for d in jd_docs])
    
    print("‚è≥ Analyzing match...")
    match_pct = get_match_percentage(full_resume_text, full_jd_text)
    print(f"\nüéØ Resume‚ÄìJD Match: {match_pct}%")

    if match_pct < 60:
        print("‚ùå Match below 60%. Candidate rejected.")
        return

    # 2. RAG only for deep question generation
    print("‚úÖ Match confirmed. Indexing documents for questions...")
    documents = resume_docs + jd_docs
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_documents(documents)
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(chunks, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 10}) # Increased k for better context

    questions = generate_questions(retriever)
    print("\n--- Generated Interview Questions ---")
    print(questions)

def get_match_percentage(resume_text, jd_text):
    # Clean up the prompt to handle full texts
    prompt = f"""
    You are a professional ATS (Applicant Tracking System).
    
    TASK:
    1. Analyze the RESUME and JOB DESCRIPTION provided below.
    2. Calculate a match percentage (0-100) based on:
       - Technical Skills & Tools
       - Years of Experience
       - Project Relevance
    
    RESUME:
    ---
    {resume_text}
    ---

    JOB DESCRIPTION:
    ---
    {jd_text}
    ---

    OUTPUT INSTRUCTIONS:
    Output ONLY the numerical value between 0 and 100. Do not include words or symbols.
    """
    response = client.chat.complete(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}]
    )
    # Basic cleaning to ensure it's a number
    res_content = response.choices[0].message.content.strip().replace('%', '')
    try:
        return float(res_content)
    except:
        return 0.0

In [None]:
# --------- Match Percentage Prompt ----------
MATCH_PROMPT = """
You are an ATS system.

Given the CONTEXT below (resume + job description):
1. Calculate percentage match between resume and JD.
2. Consider skills, experience, tools, projects.
3. Output ONLY a number between 0 and 100.

CONTEXT:
{context}
"""

In [None]:
# --------- Question Generation Prompt ----------
QUESTION_PROMPT = """
You are a technical interviewer.

Using the CONTEXT:
- Job description requirements
- Skills mentioned in resume
- Projects done by candidate

Generate:
1. 5 technical questions on the job description
2. 3 project-based questions on the projects done by candidate
3. 2 skill-based questions on the skills mentioned in resume
3. 2 scenario-based questions based on the job description

CONTEXT:
{context}
"""

In [None]:
def generate_questions(retriever):
    docs = retriever.invoke("skills projects requirements")
    context = "\n".join([d.page_content for d in docs])

    response = client.chat.complete(
        model=MODEL,
        messages=[
            {"role": "user", "content": QUESTION_PROMPT.format(context=context)}
        ]
    )
    return response.choices[0].message.content

In [None]:
# --------- Pipeline ----------
if __name__ == "__main__":
    
    print('in main')

  # Get file paths from user
    resume_path = r'C:\Users\Admin\Downloads\Git_Clone\AI-Tools\AI_Interview\data\Rajat__Sharma_AI_ML.pdf' # input("Enter resume PDF path (e.g. Rajat__Sharma_AI_ML.pdf): ").strip()
    jd_path = r'C:\Users\Admin\Downloads\Git_Clone\AI-Tools\AI_Interview\data\JD_ML.pdf' # input("Enter Job Description PDF path: ").strip()

    # Loading pdf files
    resume_docs = load_pdf(resume_path)
    jd_docs = load_pdf(jd_path)

    print('Documents fetched!')

    # Calling
    rag_impl(resume_docs, jd_docs)

    print("\nProcessing...\n")