In [1]:
# ✅ Load matcher functions
from keyword_matcher import extract_keywords, compute_keyword_match, filter_relevant_keywords

# ✅ Load parsing functions
from parsing_module import extract_text_pdfplumber, split_resume_into_sections

# ✅ Load and parse full resume
resume_text = extract_text_pdfplumber("docs/sample_resume.pdf")
sections = split_resume_into_sections(resume_text)
full_resume_text = "\n".join(sections.values())

# ✅ Sample job description (paste your real one if needed)
job_posting = """
People Data Analyst:

As a People Data Analyst, you will play a critical role in CAVA’s People & Culture function, specifically within the Total Rewards team. This role is responsible for producing regular reporting and data analysis, key in supporting strategic decision-making across People & Culture, while also collaborating with cross-functional teams across the organization. Your insights will help drive equitable, data-driven, and competitive strategies to attract and retain top talent. This role will provide data backed recommendations to help drive workforce strategy and support industry best practices.



What You’ll Do: 

Develop Periodic Reporting: Assist in creating and enhancing daily reporting to support various business areas.
Audit our People Data: Transform raw data so that it can be used in creating reports, maintaining reports, and summarizing results associated with our audits.
Interpret our People Data: Identify trends in data, comparing to both internal and external benchmarks.
Collaborate with Talent Acquisition & HR: Partner with recruiting and HR teams to merge data from outside of our HRIS systems to provide new insights and assist in creating new metrics.
Assist in Cross-Functional Analysis: Develop expertise in the organization’s people data and how it can be leveraged across other areas of the organization.
Maintain People Analytics Dashboards: Assist in dashboard maintenance and development.
Measure and Track Impact: Identify dates associated with initiatives to inform the business of the results of initiatives.


The Qualifications:

Bachelor’s degree in Human Resources, Business, Finance, Statistics, or a related field.
2+ years of experience in HR analytics, or data analysis in a corporate environment.
Proficiency in Excel (advanced), HRIS systems, and data visualization tools (e.g., Tableau, Power BI).
Experience working with SQL, Python, or another programming language preferred.
"""

# ✅ Extract keywords
raw_keywords = extract_keywords(job_posting)
print("🔎 Raw extracted keywords:")
from pprint import pprint
pprint(raw_keywords)

# ✅ Filter with GPT
filtered_keywords = filter_relevant_keywords(raw_keywords, model="gpt-4")
print("\n🧠 GPT-filtered relevant keywords:")
pprint(filtered_keywords)

# ✅ Run match scoring
match_result = compute_keyword_match(full_resume_text, job_posting)
print("\n📊 Keyword Match Result:")
pprint(match_result)



🔎 Raw extracted keywords:
{'acquisition',
 'acros',
 'advanced',
 'also',
 'analysi',
 'analyst',
 'analytic',
 'another',
 'area',
 'associated',
 'attract',
 'audit',
 'bachelor',
 'backed',
 'benchmark',
 'best',
 'both',
 'busines',
 'cava',
 'collaborate',
 'collaborating',
 'comparing',
 'competitive',
 'corporate',
 'creating',
 'critical',
 'cross-functional',
 'culture',
 'daily',
 'dashboard',
 'data',
 'data-driven',
 'date',
 'decision-making',
 'degree',
 'develop',
 'development',
 'drive',
 'enhancing',
 'environment',
 'equitable',
 'excel',
 'experience',
 'expertise',
 'external',
 'field',
 'finance',
 'function',
 'help',
 'hris',
 'human',
 'identify',
 'impact',
 'industry',
 'inform',
 'initiative',
 'insight',
 'internal',
 'interpret',
 'language',
 'leveraged',
 'maintain',
 'maintaining',
 'maintenance',
 'measure',
 'merge',
 'metric',
 'organization',
 'other',
 'outside',
 'partner',
 'people',
 'periodic',
 'play',
 'power',
 'practice',
 'preferred',
 'p

In [2]:
from keyword_classifier import classify_keywords

# 🔍 Classify the GPT-filtered relevant keywords
classified_keywords = classify_keywords(filtered_keywords)

# 🖨️ Display categorized output
for category, items in classified_keywords.items():
    print(f"\n{category.upper()} ({len(items)}):")
    for item in items:
        print(f" - {item}")

[GPT fallback] 'acquisition' → domain_knowledge
[GPT fallback] 'analytic' → domain_knowledge
[GPT fallback] 'benchmark' → domain_knowledge
[GPT fallback] 'collaborate' → soft_skill
[GPT fallback] 'competitive' → soft_skill
[GPT fallback] 'corporate' → domain_knowledge
[GPT fallback] 'cross-functional' → soft_skill
[GPT fallback] 'decision-making' → soft_skill
[GPT fallback] 'develop' → soft_skill
[GPT fallback] 'expertise' → domain_knowledge
[GPT fallback] 'maintain' → soft_skill
[GPT fallback] 'maintenance' → domain_knowledge
[GPT fallback] 'metric' → domain_knowledge
[GPT fallback] 'report' → domain_knowledge
[GPT fallback] 'statistic' → domain_knowledge
[GPT fallback] 'strategic' → soft_skill
[GPT fallback] 'strategy' → soft_skill
[GPT fallback] 'support' → soft_skill
[GPT fallback] 'system' → tool_platform
[GPT fallback] 'talent' → soft_skill
[GPT fallback] 'tool' → tool_platform
[GPT fallback] 'workforce' → domain_knowledge

TOOL_PLATFORM (7):
 - dashboard
 - excel
 - hris
 - pyth

In [3]:
from keyword_scorer import score_keywords

# 🟢 Input: classified JD keywords (by category) and flat matched list from resume
classified = classify_keywords(filtered_keywords)
matched = match_result["matched_keywords"]

# 🧠 Run keyword scoring
score_output = score_keywords(classified, matched)

# 📊 Print results
print("\n🎯 Final Scoring Breakdown:")
for category, score in score_output.items():
    label = "⭐ FINAL SCORE" if category == "final_score" else f"• {category}"
    print(f"{label:25}: {score}")


[GPT fallback] 'acquisition' → domain_knowledge
[GPT fallback] 'analytic' → domain_knowledge
[GPT fallback] 'benchmark' → domain_knowledge
[GPT fallback] 'collaborate' → soft_skill
[GPT fallback] 'competitive' → soft_skill
[GPT fallback] 'corporate' → domain_knowledge
[GPT fallback] 'cross-functional' → soft_skill
[GPT fallback] 'decision-making' → soft_skill
[GPT fallback] 'develop' → soft_skill
[GPT fallback] 'expertise' → domain_knowledge
[GPT fallback] 'maintain' → soft_skill
[GPT fallback] 'maintenance' → domain_knowledge
[GPT fallback] 'metric' → domain_knowledge
[GPT fallback] 'report' → domain_knowledge
[GPT fallback] 'statistic' → domain_knowledge
[GPT fallback] 'strategic' → soft_skill
[GPT fallback] 'strategy' → soft_skill
[GPT fallback] 'support' → soft_skill
[GPT fallback] 'system' → tool_platform
[GPT fallback] 'talent' → soft_skill
[GPT fallback] 'tool' → tool_platform
[GPT fallback] 'workforce' → domain_knowledge

📊 Detailed Scoring Breakdown:

TOOL_PLATFORM
  Matched: 

In [4]:
from keyword_scorer import get_matched_keywords_by_category

matched_by_category = get_matched_keywords_by_category(classified, matched)

print("\n📌 Matched Resume Keywords per Category:")
for cat, words in matched_by_category.items():
    print(f"\n{cat.upper()} ({len(words)}):")
    for word in words:
        print(f" - {word}")



📌 Matched Resume Keywords per Category:

TOOL_PLATFORM (5):
 - dashboard
 - excel
 - python
 - system
 - tableau

CERTIFICATION_LICENSE (1):
 - bachelor

DOMAIN_KNOWLEDGE (13):
 - analyst
 - analytic
 - data
 - data-driven
 - development
 - expertise
 - finance
 - metric
 - report
 - reporting
 - statistic
 - visualization
 - workforce

SOFT_SKILL (5):
 - cross-functional
 - decision-making
 - develop
 - strategic
 - support

OTHER (0):
