In [1]:
import os
import json
from pathlib import Path
from PyPDF2 import PdfReader
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
import pandas as pd

# === Configuration ===
resume_path = Path("resume_folder/resume.pdf")
jd_folder = Path("InputJD_Folder")
output_csv = Path("jd_matching_results.csv")

# === Load Resume Text ===
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(str(pdf_path))
    return " ".join([page.extract_text() or "" for page in reader.pages])

# === Chunking ===
def chunk_text(text, max_tokens=3000):
    sentences = text.split('. ')
    chunks, current = [], ""
    for sentence in sentences:
        if len(current) + len(sentence) < max_tokens:
            current += sentence + ". "
        else:
            chunks.append(current.strip())
            current = sentence + ". "
    if current:
        chunks.append(current.strip())
    return chunks

# === Match Resume Against JD ===
def match_resume_with_jd(resume_text, jd_text, jd_name):
    chunks = chunk_text(resume_text)
    summaries = []

    # Step 1: Summarize each resume chunk
    llm = ChatOpenAI(temperature=0.7, max_tokens=1000, model="gpt-3.5-turbo", openai_api_key="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")

    summary_prompt = PromptTemplate(
        input_variables=["text", "jd"],
        template="""
You are a smart AI assistant helping a student find a job. Summarize how well the resume content below matches the job description.

Resume:
\"\"\"
{text}
\"\"\"

Job Description:
\"\"\"
{jd}
\"\"\"

List relevant skills, strengths, and matching points. Keep it brief but meaningful.
"""
    )

    for chunk in chunks:
        prompt = summary_prompt.format(text=chunk, jd=jd_text)
        summary = llm.predict(prompt)
        summaries.append(summary)

    # Step 2: Final match score and comments
    merge_prompt = f"""
You're an AI career assistant. Based on the summaries of the student's resume chunks below:

{"".join(summaries)}

And this job description:

{jd_text}

Evaluate and return a structured JSON with:
{{
  "JD Name": "{jd_name}",
  "Match Percentage": "<Number between 0-100>",
  "Key Matching Skills": [],
  "Resume Fit Summary": "<Short summary why this job is a good/bad fit>"
}}
Return only valid JSON.
"""

    final_response = llm.predict(merge_prompt)
    try:
        return json.loads(final_response)
    except:
        return {
            "JD Name": jd_name,
            "Match Percentage": 0,
            "Key Matching Skills": [],
            "Resume Fit Summary": "⚠️ Could not parse response properly."
        }

# === Main Execution ===
if not resume_path.exists():
    print("❌ Resume not found.")
    exit()

resume_text = extract_text_from_pdf(resume_path)
jd_files = list(jd_folder.glob("*.txt"))

if not jd_files:
    print("❌ No job descriptions found.")
    exit()

results = []

print("🔍 Matching resume with job descriptions...")
for jd_file in jd_files:
    jd_text = jd_file.read_text(encoding="utf-8")
    jd_name = jd_file.stem
    print(f"➡️ Checking against JD: {jd_name}")
    match_result = match_resume_with_jd(resume_text, jd_text, jd_name)
    results.append(match_result)

# Save to CSV
df = pd.DataFrame(results)
df = df.sort_values(by="Match Percentage", ascending=False)
df.to_csv(output_csv, index=False)
print(f"✅ Matching completed. Results saved to {output_csv.resolve()}")


🔍 Matching resume with job descriptions...
➡️ Checking against JD: dataanalyst


  warn_deprecated(
  warn_deprecated(


➡️ Checking against JD: softwaretester
➡️ Checking against JD: webdeveloper
✅ Matching completed. Results saved to C:\Users\Vilas\document scanner\stage4\jd_matching_results.csv
