In [1]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np

# -------------------------
# Load all three outputs
# -------------------------
base_file = "../ml_engine/data/processed/matches.csv"
gap_file = "../ml_engine/data/processed/matches_with_gaps.csv"
weighted_file = "../ml_engine/data/processed/matches_weighted.csv"

matches_base = pd.read_csv(base_file)
matches_gap = pd.read_csv(gap_file)
matches_weighted = pd.read_csv(weighted_file)

pd.set_option("display.max_colwidth", None)

# -------------------------
# Helpers
# -------------------------
def score_label(score: float):
    """Return emoji + label based on score strength."""
    if score >= 0.85:
        return "🔥 Strong Match"
    elif score >= 0.70:
        return "👍 Good Match"
    else:
        return "⚠️ Weak Match"

def normalize_scores(scores):
    """Normalize list of scores between 0 and 1."""
    if not scores:
        return []
    min_s, max_s = min(scores), max(scores)
    if max_s == min_s:
        return [0.5 for _ in scores]
    return [(s - min_s) / (max_s - min_s) for s in scores]

# -------------------------
# Baseline recs
# -------------------------
def get_baseline_recs(student_id: str, top_n: int = 5):
    row = matches_base[matches_base["student_id"] == student_id]
    if row.empty:
        return ["⚠️ No baseline recs found"]
    recs = eval(row.iloc[0]["top_matches"])
    recs = recs[:top_n]
    return [
        f"Internship {jid} (score: {score:.2f}) {score_label(score)}"
        for jid, score in recs
    ]

# -------------------------
# Gap-analysis recs
# -------------------------
def get_gap_recs(student_id: str, top_n: int = 5):
    df = matches_gap[matches_gap["student_id"] == student_id].head(top_n)
    if df.empty:
        return ["⚠️ No gap recs found"]

    recs = []
    for _, r in df.iterrows():
        jid = r["job_id"]
        score = r["score"]
        missing = str(r["missing_skills"])
        if r["missing_count"] == 0:
            msg = "✅ All required skills present"
        else:
            msg = f"❌ Missing: {missing} ({r['missing_pct']}%)"
        recs.append(f"Internship {jid} (score: {score:.2f}) | {msg}")
    return recs

# -------------------------
# Weighted recs (normalized)
# -------------------------
def get_weighted_recs(student_id: str, top_n: int = 5):
    row = matches_weighted[matches_weighted["student_id"] == student_id]
    if row.empty:
        return ["⚠️ No weighted recs found"]
    recs = eval(row.iloc[0]["top_matches"])[:top_n]
    scores = [s for _, s in recs]
    norm_scores = normalize_scores(scores)

    results = []
    for (jid, _), ns in zip(recs, norm_scores):
        results.append(
            f"Internship {jid} (weighted score: {ns:.2f}) {score_label(ns)}"
        )
    return results

# -------------------------
# Interactive widget
# -------------------------
student_ids = matches_base["student_id"].unique().tolist()

dropdown_student = widgets.Dropdown(
    options=student_ids,
    description="🎓 Student:",
    value=student_ids[0],
    style={'description_width': 'initial'},
    layout=widgets.Layout(width="300px")
)

slider_topn = widgets.IntSlider(
    value=3,
    min=1,
    max=10,
    step=1,
    description="Top N:",
    continuous_update=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width="400px")
)

output_area = widgets.Output()

def on_change(change):
    with output_area:
        clear_output()
        sid = dropdown_student.value
        top_n = slider_topn.value
        
        print("=" * 60)
        print(f"✅ Baseline Recommendations for {sid}:")
        for rec in get_baseline_recs(sid, top_n):
            print("  -", rec)
        
        print("\n🎯 Gap-Analysis Recommendations:")
        for rec in get_gap_recs(sid, top_n):
            print("  -", rec)
        
        print("\n⚡ Weighted Recommendations:")
        for rec in get_weighted_recs(sid, top_n):
            print("  -", rec)
        print("=" * 60)

dropdown_student.observe(on_change, names="value")
slider_topn.observe(on_change, names="value")

display(dropdown_student, slider_topn, output_area)

# Trigger initial display
on_change(None)


Dropdown(description='🎓 Student:', layout=Layout(width='300px'), options=('S00001', 'S00002', 'S00003', 'S0000…

IntSlider(value=3, continuous_update=False, description='Top N:', layout=Layout(width='400px'), max=10, min=1,…

Output()