
# MajorMatch: Main Recommendation Notebook

This notebook loads the majors dataset and contextual information, collects user input, and produces rule-adjusted recommendations using TF-IDF + cosine similarity.


In [None]:

# Install lightweight dependencies (safe in Colab)
try:
    import sklearn  # noqa: F401
except ImportError:
    %pip install -q scikit-learn


In [None]:

import pandas as pd
from major_matcher import (
    apply_rules,
    collect_user_input,
    compute_similarity_scores,
    generate_recommendation_report,
    load_context,
    load_majors_data,
    vectorize_majors,
    vectorize_user_profile,
)

MAJORS_PATH = "majors.json"
CONTEXT_PATH = "academics_and_skills_info"



## Load datasets
The helper functions keep the parsing and cleaning logic outside the notebook so that cells stay focused on orchestration.


In [None]:

majors_df = load_majors_data(MAJORS_PATH)
context = load_context(CONTEXT_PATH)

print(f"Loaded {len(majors_df)} majors")
print(f"Context skills detected: {len(context.get('skills', []))}")
print(f"Context hobbies detected: {len(context.get('hobbies', []))}")



## Preview context information
You can review the parsed grade scale explanation and edit the lists below before collecting user input.


In [None]:

context



## Collect user input
Provide grades, career aspirations, and choose skills/hobbies from the parsed context lists. You can always add custom entries.


In [None]:

user_profile = collect_user_input(
    skills=context.get("skills", []),
    hobbies=context.get("hobbies", []),
)
user_profile



## Vectorize majors and user profile
TF-IDF is applied over combined descriptive keywords for each major, then cosine similarity ranks the matches.


In [None]:

vectorization = vectorize_majors(majors_df)
user_vec = vectorize_user_profile(user_profile, vectorization["vectorizer"])
ranked = compute_similarity_scores(user_vec, vectorization["matrix"], majors_df)

print("Top 3 (pre-rules):")
for entry in ranked[:3]:
    print(f"- {entry['major_name']}: {entry['score']:.3f}")



## Apply rule-based refinements
Simple heuristics adjust rankings using grade thresholds, career alignment, and skill overlaps.


In [None]:

adjusted = apply_rules(ranked, user_profile, majors_df, top_n=10)
top_major = adjusted[0] if adjusted else None
report_text, report_data = generate_recommendation_report(top_major, user_profile, adjusted)
print(report_text)



## Inspect the full ranked list (optional)
Use this cell to review how majors are ordered after rule adjustments.


In [None]:

adjusted
