In [5]:
# Importing main.py functions
from main import (
    setup_environment,
    process_resume,
    split_resume_into_sections,
    process_job_posting,
    extract_keywords,
    calculate_keyword_match,
    filter_relevant_keywords,
    enhance_section
)

from parsing_module import extract_headers_with_pdfplumber

setup_environment()


✅ Loaded NEW llm_api with OpenAI SDK v1.x syntax
✅ Environment variables loaded successfully.


In [2]:
resume_file = "docs/sample_resume.pdf"
job_input = """
[Big Data Tools Developer

We build, improve, and maintain one of the highest scaling platforms in the world. Our amazing team of Engineers work on next generation Big Data Platforms that transform how users connect with each other every single day. Yahoo's Big Data Platform drives some of the most demanding applications in the industry. The system handles billions of requests a day and runs on some of the largest Hadoop clusters ever built! 50,000 nodes strong and several multi-thousand node clusters bring scalable computing to a whole new level. We work on problems that cover a wide spectrum - from web services to operating systems and networking layers. Our biggest challenges ahead are designing efficient cloud native big data platforms.

Responsibilities:

Job Monitoring: Overseeing the execution of various data jobs, ensuring they adhere to SLAs and do not encounter issues.
Data Orchestration: Utilizing tools like Airflow to manage the scheduling, execution, and monitoring of data workflows across cloud platforms such as AWS and GCP.
Query Execution and Optimization: Designing and optimizing queries to run efficiently on platforms such as BigQuery, Hive, Pig, and Spark, ensuring high performance and scalability.
Integration and Support: Collaborating with different teams to integrate data flows, provide support for query executions, and handle credentials for secure data operations.
Feature Development: Implementing new features to support advanced query capabilities, including federated queries and lineage tracking.

Required Skills and Qualifications:

Educational Background: A Bachelor's or Master’s degree in Computer Science or equivalent work experience.
Programming Languages: Proficiency in Python is essential for scripting and workflow management; experience with Java and C++ is preferred for backend data operations.
Data Management: Knowledge of data structures, algorithms, and database management systems like SQL, HBase, and BigQuery.
Cloud Technologies: Experience with cloud services, especially AWS (EMR, Glue, S3) and GCP (Dataproc, BigQuery).
Agile Methodology: Comfortable working in an Agile environment with regular sprints, planning, and retrospectives.
System Design: Ability to design large-scale, distributed systems that are highly available and resilient.
OS: Some experience working with Linux/Unix operating systems

Preferred Qualifications:

Experience with development and deployment on public cloud platforms such as AWS, GCP, Azure, or others
Experiencing developing containerized applications and working with container orchestration services
Experience with Apache Hadoop, Presto, Hive, Oozie, Pig, Storm, Spark, Jupyter
Understanding of data structures & algorithms
Knowledge of JVM internals and its performance tuning
Excellent debugging/testing skills, and excellent analytical and problem solving skills
Experience with continuous integration tools such as Jenkins and Hudson
Strong verbal and written communication skills to collaborate effectively with cross-functional teams.]
"""

# Extract text from resume and job posting
resume_text = process_resume(resume_file)
job_text = process_job_posting(job_input)

# Parse sections from resume
sections = split_resume_into_sections(resume_text, pdf_path=resume_file)

# DEBUG: Print pdfplumber headers directly
pdf_headers = extract_headers_with_pdfplumber(resume_file)
print("\n[DEBUG] Headers from pdfplumber:", pdf_headers)


# Extract overall keywords
resume_keywords = extract_keywords(resume_text)
job_keywords = extract_keywords(job_text)

print("✅ Resume and job posting processed successfully.")
print(f"Found Resume Sections: {list(sections.keys())}")




[HEADER CANDIDATE] 'Neal Iyer'
[HEADER CANDIDATE] 'Professional Summary'
[NORMALIZED] 'Professional Summary' → 'summary'
[HEADER CANDIDATE] 'Professional Skills'
[NORMALIZED] 'Professional Skills' → 'skills'
[HEADER CANDIDATE] 'Workforce Planning'
[HEADER CANDIDATE] 'Optimization'
[HEADER CANDIDATE] 'Communication'
[HEADER CANDIDATE] 'Proactive Problem-Solving
Experience'
[HEADER CANDIDATE] 'Budget Analyst'
[HEADER CANDIDATE] 'Budget Analyst'
[HEADER CANDIDATE] 'Cresa
Accounting Analyst'
[HEADER CANDIDATE] 'FrontStream
Fund Accountant'
[HEADER CANDIDATE] 'Education and Certifications'
[normalize_section_name] GPT returned: 'education' for 'Education and Certifications'
[NORMALIZED] 'Education and Certifications' → 'education'
[HEADER CANDIDATE] 'Flatiron Data Science Bootcamp'
[HEADER CANDIDATE] 'HackerRank Intermediate SQL Certification'
[HEADER CANDIDATE] 'Projects'
[FORCED-NORMALIZED] 'Projects' → 'projects' (from font size match)
[INJECTED] Forcing section 'experience' from pdfplum

In [1]:
# === Mock GPT Function ===
def mock_gpt_enhancement(section_name, section_text, job_keywords):
    """
    Simulates GPT enhancement by inserting keywords and tagging improvement areas.
    Replace with real OpenAI API call later.
    """
    print(f"\n--- Enhancing: {section_name.upper()} ---")
    print("Relevant Keywords:", ", ".join(job_keywords))

    # Simulated enhancement (stub)
    enhanced = f"[Improved {section_name} section with keywords: {', '.join(job_keywords)}]\n\n{section_text}"
    return enhanced


# === Sample Data (Replace with your parsed_sections object) ===
parsed_sections = {
    "summary": """
Federal Data Analyst with 4+ years in human capital analytics, data-driven decision-making, and workforce planning. 
Skilled in SQL, Python, Tableau, and Power BI, with a track record of optimizing HR processes through data analysis, 
reporting automation, and visualization.
""".strip(),

    "skills": """
● Data Analytics & Visualization: SQL · Python · Tableau · Power BI · Advanced Excel · Advanced MS Office Suite  
● Human Capital & Workforce Analytics: HR Metrics · Hiring Pipeline Analytics · Employee Retention Analysis
""".strip()
}

# === Sample Keywords ===
job_keywords = [
    "SQL", "Python", "workforce analytics", "reporting automation", 
    "budget forecasting", "Power BI", "stakeholder communication"
]

# === Extract Text from Parsed Sections ===
summary_text = parsed_sections.get("summary", "")
skills_text = parsed_sections.get("skills", "")

# === Enhance Sections ===
enhanced_summary = mock_gpt_enhancement("summary", summary_text, job_keywords)
enhanced_skills = mock_gpt_enhancement("skills", skills_text, job_keywords)

# === Print Results ===
print("\n=== ENHANCED SUMMARY ===\n")
print(enhanced_summary)

print("\n=== ENHANCED SKILLS ===\n")
print(enhanced_skills)



--- Enhancing: SUMMARY ---
Relevant Keywords: SQL, Python, workforce analytics, reporting automation, budget forecasting, Power BI, stakeholder communication

--- Enhancing: SKILLS ---
Relevant Keywords: SQL, Python, workforce analytics, reporting automation, budget forecasting, Power BI, stakeholder communication

=== ENHANCED SUMMARY ===

[Improved summary section with keywords: SQL, Python, workforce analytics, reporting automation, budget forecasting, Power BI, stakeholder communication]

Federal Data Analyst with 4+ years in human capital analytics, data-driven decision-making, and workforce planning. 
Skilled in SQL, Python, Tableau, and Power BI, with a track record of optimizing HR processes through data analysis, 
reporting automation, and visualization.

=== ENHANCED SKILLS ===

[Improved skills section with keywords: SQL, Python, workforce analytics, reporting automation, budget forecasting, Power BI, stakeholder communication]

● Data Analytics & Visualization: SQL · Pytho

In [6]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# === Step 1: Define GPT-enhancement function ===
def enhance_section_with_gpt(section_name, section_text, job_keywords, client=None, model="gpt-4"):
    """
    Uses GPT to rewrite a resume section (summary or skills).
    """
    if client is None:
        raise ValueError("OpenAI client must be provided.")

    prompt = (
        "You are an expert resume writer.\n\n"
        "Improve the following resume section for clarity, tone, and professionalism.\n"
        "Preserve any bullet formatting if it exists.\n\n"
        f"Integrate the following job-relevant keywords naturally and only where appropriate:\n"
        f"{', '.join(job_keywords)}\n\n"
        f"Section to improve: {section_name.upper()}\n\n"
        f"```\n{section_text}\n```\n\n"
        "Respond only with the improved section text."
    )

    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )

    return response.choices[0].message.content.strip()


# === Step 2: Sample data ===
summary_text = """
Federal Data Analyst with 4+ years in human capital analytics, data-driven decision-making, and workforce planning. 
Skilled in SQL, Python, Tableau, and Power BI, with a track record of optimizing HR processes through data analysis, 
reporting automation, and visualization.
""".strip()

skills_text = """
● Data Analytics & Visualization: SQL · Python · Tableau · Power BI · Advanced Excel · Advanced MS Office Suite  
● Human Capital & Workforce Analytics: HR Metrics · Hiring Pipeline Analytics · Employee Retention Analysis
""".strip()

job_keywords = [
    "SQL", "Python", "workforce analytics", "reporting automation", 
    "budget forecasting", "Power BI", "stakeholder communication"
]


# === Step 3: Enhance sections using GPT ===
real_summary = enhance_section_with_gpt("summary", summary_text, job_keywords, client=client)
real_skills = enhance_section_with_gpt("skills", skills_text, job_keywords, client=client)

# === Step 4: Output results ===
print("\n=== REAL ENHANCED SUMMARY ===\n")
print(real_summary)

print("\n=== REAL ENHANCED SKILLS ===\n")
print(real_skills)



=== REAL ENHANCED SUMMARY ===

SUMMARY

Highly accomplished Federal Data Analyst offering over four years of experience in the realm of human capital analytics, data-driven decision-making, and strategic workforce planning. Proficient in leveraging SQL and Python in conjunction with visualization tools like Tableau and Power BI to enhance workforce analytics. Demonstrated expertise in reporting automation, budget forecasting, and stakeholder communication, leading to optimized HR processes and informed decision-making.

=== REAL ENHANCED SKILLS ===

● Data Analytics & Visualization: Proficient in SQL and Python for data analysis and manipulation. Expertise in creating interactive dashboards and visualizations using Tableau and Power BI. Highly skilled in Advanced Excel and MS Office Suite.
● Workforce Analytics: Adept at utilizing HR metrics in workforce analytics to build comprehensive hiring pipeline analytics and employee retention analysis. Skilled in reporting automation, budget 