
# MajorMatch: Testing & Validation Notebook

This notebook runs mock profiles through the full pipeline to validate recommendation quality and allow quick rule tuning.


In [None]:

# Ensure dependencies are available
try:
    import sklearn  # noqa: F401
except ImportError:
    %pip install -q scikit-learn


In [None]:

import pandas as pd
from major_matcher import (
    apply_rules,
    compute_similarity_scores,
    generate_recommendation_report,
    load_context,
    load_majors_data,
    vectorize_majors,
    vectorize_user_profile,
)

MAJORS_PATH = "majors.json"
CONTEXT_PATH = "academics_and_skills_info"



## Load data


In [None]:

majors_df = load_majors_data(MAJORS_PATH)
context = load_context(CONTEXT_PATH)
context



## Define mock user profiles
Feel free to modify these profiles or add new ones to explore how rules change outcomes.


In [None]:

mock_profiles = [
    {
        "name": "STEM-strong coder",
        "grades": "Overall 92% | Math 95 | Physics 90",
        "career_aspiration": "software engineer in cybersecurity",
        "skills": ["Problem Solving", "Digital Literacy / Technology Skills", "Critical Thinking"],
        "hobbies": ["Video Games", "Following Technology (smartphones, AI, gadgets)"]
    },
    {
        "name": "Health-focused volunteer",
        "grades": "Overall 78% | Biology 82 | Chemistry 76",
        "career_aspiration": "clinical pharmacist",
        "skills": ["Organization", "Practical Lab Skills", "Responsibility & Independence"],
        "hobbies": ["Volunteering and Community Service", "Reading"]
    },
    {
        "name": "Creative marketer",
        "grades": "Overall 83% | English 88",
        "career_aspiration": "digital marketing specialist",
        "skills": ["Communication", "Creativity", "Decision-Making"],
        "hobbies": ["Social Media Content Creation", "Photography", "Entrepreneurship and Small Online Businesses"]
    },
]
mock_profiles



## Optional: adjust rule parameters
You can tweak the number of majors to consider before rules are applied.


In [None]:

TOP_N = 10



## Run evaluations


In [None]:

vectorization = vectorize_majors(majors_df)

for profile in mock_profiles:
    print("=" * 80)
    print(f"Profile: {profile['name']}")
    user_vec = vectorize_user_profile(profile, vectorization["vectorizer"])
    ranked = compute_similarity_scores(user_vec, vectorization["matrix"], majors_df)
    adjusted = apply_rules(ranked, profile, majors_df, top_n=TOP_N)
    top_major = adjusted[0] if adjusted else None
    report_text, report_data = generate_recommendation_report(top_major, profile, adjusted)
    print(report_text)
    print()



## Inspect raw similarity scores for the last profile (optional)


In [None]:

adjusted[:5]
