In [16]:
import pandas as pd
import numpy as np

In [17]:
df = pd.read_csv('program_exercise.csv')
df.head()

Unnamed: 0,program_id,program_title,program_description,program_goal,program_level,program_time_minutes,program_equipment,program_bodyparts,exercise_id,exercise_title,exercise_bodypart,exercise_equipment,exercise_level,exercise_rating
0,0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,"['Muscle & Sculpting', 'Bodyweight Fitness']",Intermediate,90.0,Full Gym,"Abdominals, Biceps, Calves, Chest, Glutes, Ham...",8,Barbell roll-out,Abdominals,Barbell,Intermediate,8.9
1,0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,"['Muscle & Sculpting', 'Bodyweight Fitness']",Intermediate,90.0,Full Gym,"Abdominals, Biceps, Calves, Chest, Glutes, Ham...",9,Barbell Ab Rollout - On Knees,Abdominals,Barbell,Intermediate,8.9
2,0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,"['Muscle & Sculpting', 'Bodyweight Fitness']",Intermediate,90.0,Full Gym,"Abdominals, Biceps, Calves, Chest, Glutes, Ham...",10,Decline bar press sit-up,Abdominals,Barbell,Intermediate,8.5
3,0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,"['Muscle & Sculpting', 'Bodyweight Fitness']",Intermediate,90.0,Full Gym,"Abdominals, Biceps, Calves, Chest, Glutes, Ham...",13,Seated bar twist,Abdominals,Barbell,Intermediate,4.7
4,0,(MASS MONSTER) High Intensity 4 Day Upper Lowe...,Build tones of muscular with this high intensi...,"['Muscle & Sculpting', 'Bodyweight Fitness']",Intermediate,90.0,Full Gym,"Abdominals, Biceps, Calves, Chest, Glutes, Ham...",21,Kettlebell Windmill,Abdominals,Kettlebells,Intermediate,7.7


#### Build program–exercise matrix (manual CF foundation)
- User-item interaction matrix

In [18]:
# Keep only relevant columns
cols = [
    'program_id', 'program_title', 'program_description', 'program_goal',
    'program_level', 'program_time_minutes', 'program_equipment', 'program_bodyparts',
    'exercise_id', 'exercise_title', 'exercise_bodypart', 'exercise_equipment',
    'exercise_level', 'exercise_rating'
]
df = df[cols].copy()

# Ensure ratings are numeric (fill NaN with 0 for implicit feedback)
df['exercise_rating'] = pd.to_numeric(df['exercise_rating'], errors='coerce').fillna(0.0)

# Create integer indices for programs and exercises
prog_ids = df['program_id'].unique()
ex_ids = df['exercise_id'].unique()

prog_id_to_idx = {pid: i for i, pid in enumerate(prog_ids)}
idx_to_prog_id = {i: pid for pid, i in prog_id_to_idx.items()}
ex_id_to_idx = {eid: j for j, eid in enumerate(ex_ids)}
idx_to_ex_id = {j: eid for eid, j in ex_id_to_idx.items()}

n_programs = len(prog_ids)
n_exercises = len(ex_ids)
print('Programs:', n_programs, 'Exercises:', n_exercises)

# Build dense rating matrix R (program x exercise)
# R[p, e] = rating of exercise e in program p (0 if not in program)
R = np.zeros((n_programs, n_exercises), dtype=np.float32)

for _, row in df.iterrows():
    p_idx = prog_id_to_idx[row['program_id']]
    e_idx = ex_id_to_idx[row['exercise_id']]
    R[p_idx, e_idx] = row['exercise_rating'] if row['exercise_rating'] > 0 else 1.0  # implicit 1 if 0

R.shape


Programs: 1028 Exercises: 779


(1028, 779)

In [19]:
# check R
R[:10, :10]  

array([[8.9, 8.9, 8.5, 4.7, 7.7, 9.3, 8.6, 9.5, 9.3, 9.2],
       [8.9, 8.9, 8.5, 4.7, 7.7, 9.3, 8.6, 9.5, 9.3, 9.2],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]], dtype=float32)

#### Recommend programs from a set of liked exercises (manual CF)

In [20]:
# Build exercise metadata table for name->id lookups
exercise_meta = df[['exercise_id', 'exercise_title', 'exercise_bodypart',
                    'exercise_equipment', 'exercise_level']].drop_duplicates()

exercise_title_to_id = (
    exercise_meta
    .drop_duplicates(subset=['exercise_title'])
    .set_index('exercise_title')['exercise_id']
    .to_dict()
)

# Program metadata for readable output
program_meta = df[['program_id', 'program_title', 'program_goal','program_description',
                  'program_level', 'program_time_minutes',
                  'program_equipment', 'program_bodyparts']].drop_duplicates()

def recommend_programs_from_exercises_manual_cf(
    liked_exercise_titles,
    disliked_exercise_titles=None,
    top_k=10,
    filter_equipment=None,
    filter_level=None,
    min_time=None,
    max_time=None
):
    """
    Memory-based CF (manual) to recommend programs from liked exercises.

    Steps:
    1. Build a user preference vector over exercises from liked titles.
    2. Score each program by dot(user_pref, R[p,:]) (equivalent to item-based CF).
    3. Optionally penalize programs that contain disliked exercises.
    4. Apply metadata filters and return top_k programs.
    """
    disliked_exercise_titles = disliked_exercise_titles or []

    # 1) Map titles -> exercise_ids
    liked_ids = []
    for t in liked_exercise_titles:
        if t in exercise_title_to_id:
            liked_ids.append(exercise_title_to_id[t])
        else:
            print(f"[WARN] Liked exercise not found: {t}")

    if not liked_ids:
        print('No liked exercises found in dataset.')
        return pd.DataFrame()

    disliked_ids = [
        exercise_title_to_id[t]
        for t in disliked_exercise_titles
        if t in exercise_title_to_id
    ]

    # 2) Build user preference vector over exercises
    #    Simple approach: 1 for liked exercises, 0 otherwise.
    u = np.zeros(n_exercises, dtype=np.float32)
    for eid in liked_ids:
        e_idx = ex_id_to_idx.get(eid, None)
        if e_idx is not None:
            u[e_idx] = 1.0

    # 3) Score each program: dot(user_pref, program_exercise_vector)
    #    Equivalent to summing ratings for liked exercises in each program.
    #    shape: (n_programs,)
    scores = R @ u  # R: (n_programs, n_exercises)

    # 4) Penalize programs containing disliked exercises
    if disliked_ids:
        bad_mask = np.zeros(n_programs, dtype=np.float32)
        for eid in disliked_ids:
            e_idx = ex_id_to_idx.get(eid, None)
            if e_idx is not None:
                # any program with this exercise gets a penalty
                bad_mask[R[:, e_idx] > 0] += 1.0
        scores -= bad_mask * 1.0  # penalty weight

    # Build DataFrame with program_id and score
    prog_idx = np.arange(n_programs)
    prog_ids_arr = np.array([idx_to_prog_id[i] for i in prog_idx])
    score_df = pd.DataFrame({'program_id': prog_ids_arr, 'score': scores})

    # Merge with metadata
    recs = score_df.merge(program_meta, on='program_id', how='left')

    # Apply filters
    if filter_equipment is not None:
        recs = recs[recs['program_equipment'] == filter_equipment]

    if filter_level is not None:
        recs = recs[recs['program_level'] == filter_level]

    if min_time is not None:
        recs = recs[recs['program_time_minutes'] >= min_time]

    if max_time is not None:
        recs = recs[recs['program_time_minutes'] <= max_time]

    # Remove zero-score programs (no overlap with liked exercises)
    recs = recs[recs['score'] > 0]

    # Sort and return top_k
    recs = recs.sort_values('score', ascending=False).head(top_k).reset_index(drop=True)
    return recs

#### Create a input form for liked and disliked workout
- Created this form based on body parts/experience level/equipment filter
- Make this an interactive form for user to select and input into the CF model

In [21]:
df['exercise_level'].unique()

array(['Intermediate', 'Beginner', 'Expert'], dtype=object)

In [22]:
df['exercise_bodypart'].unique()

array(['Abdominals', 'Biceps', 'Calves', 'Chest', 'Glutes', 'Hamstrings',
       'Lats', 'Lower Back', 'Middle Back', 'Quadriceps', 'Shoulders',
       'Triceps', 'Forearms', 'Neck', 'Traps', 'Adductors'], dtype=object)

In [23]:
df['exercise_equipment'].unique()

array(['Barbell', 'Kettlebells', 'Dumbbell', 'Other', 'Cable', 'Machine',
       'Body Only', 'Medicine Ball', 'None', 'Exercise Ball',
       'E-Z Curl Bar', 'Bands', 'Foam Roll'], dtype=object)

In [24]:
df['program_goal'].unique()

array(["['Muscle & Sculpting', 'Bodyweight Fitness']",
       "['Athletics', 'Powerlifting', 'Powerbuilding']",
       "['Powerbuilding', 'Powerlifting', 'Bodybuilding', 'Muscle & Sculpting']",
       "['Bodybuilding', 'Muscle & Sculpting']", "['Bodybuilding']",
       "['Muscle & Sculpting']",
       "['Bodybuilding', 'Athletics', 'Muscle & Sculpting']",
       "['Powerlifting', 'Athletics']", "['Athletics', 'Powerbuilding']",
       "['Powerlifting', 'Bodyweight Fitness']",
       "['Athletics', 'Bodybuilding', 'Muscle & Sculpting']",
       "['Bodybuilding', 'Powerbuilding']",
       "['Bodybuilding', 'Athletics']",
       "['Athletics', 'Bodyweight Fitness']",
       "['Muscle & Sculpting', 'Bodybuilding']", "['Athletics']",
       "['Bodybuilding', 'Powerbuilding', 'Muscle & Sculpting']",
       "['Powerbuilding', 'Bodybuilding', 'Athletics']",
       "['Athletics', 'Bodybuilding', 'Powerlifting', 'Powerbuilding', 'Bodyweight Fitness', 'Olympic Weightlifting']",
       "['Powerlif

In [27]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import ast

# Prepare unique flat goal labels from array-like strings
raw_goals = df['program_goal'].dropna().unique().tolist()
flat_goals = []
for g in raw_goals:
    try:
        parsed = ast.literal_eval(g) if isinstance(g, str) else g
        if isinstance(parsed, (list, tuple)):
            flat_goals.extend(parsed)
        else:
            flat_goals.append(str(parsed))
    except Exception:
        flat_goals.append(str(g))
unique_goals = sorted(set(flat_goals))

# Dropdowns for high-level filters
# Include an explicit 'All' choice for each filter
level_dropdown = widgets.Dropdown(
    options=['All', 'Intermediate', 'Beginner', 'Expert'],
    value='All',
    description='Level:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='220px')
)

goal_dropdown = widgets.Dropdown(
    options=['All'] + unique_goals,
    value='All',
    description='Goal:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='220px')
)

bodypart_select = widgets.SelectMultiple(
    options=['All'] + sorted(df['exercise_bodypart'].dropna().unique().tolist()),
    value=('All',),
    description='Body parts:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='300px', height='140px')
)

equipment_select = widgets.SelectMultiple(
    options=['All'] + sorted(df['exercise_equipment'].dropna().unique().tolist()),
    value=('All',),
    description='Equipment:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='300px', height='140px')
)

# Toggle: show separate liked/disliked lists per body part
per_bodypart_tabs_toggle = widgets.Checkbox(
    value=False,
    description='Separate tabs per body part',
    indent=False
)

# Widgets for liked / disliked exercises (global view)
liked_select = widgets.SelectMultiple(
    options=[],
    value=(),
    description='Liked:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='300px', height='180px',border='2px solid green')
)

disliked_select = widgets.SelectMultiple(
    options=[],
    value=(),
    description='Disliked:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='300px', height='180px', border='2px solid red')
)

# Containers for per-bodypart widgets
bodypart_tabs = widgets.Tab(children=[])

# Buttons
filter_button = widgets.Button(description='Filter Exercises', button_style='info')
run_button = widgets.Button(
    description='Recommend Programs',
    button_style='success',
    layout=widgets.Layout(width='200px', height='28px')
)

# Output areas
exercise_list_output = widgets.Output()
recommendation_output = widgets.Output()


def _filter_exercises_dataframe():
    """Return filtered exercise dataframe based on level/bodyparts/equipment widgets."""
    level = level_dropdown.value
    bodyparts = list(bodypart_select.value)
    equipment = list(equipment_select.value)

    ex = exercise_meta.copy()

    # Level filter (skip when 'All')
    if level != 'All':
        ex = ex[ex['exercise_level'] == level]

    # Bodypart filter (skip when only 'All' is selected or list empty)
    if bodyparts and not (len(bodyparts) == 1 and bodyparts[0] == 'All'):
        ex = ex[ex['exercise_bodypart'].isin(bodyparts)]

    # Equipment filter (same 'All' logic)
    if equipment and not (len(equipment) == 1 and equipment[0] == 'All'):
        ex = ex[ex['exercise_equipment'].isin(equipment)]
    
    return ex

def _collect_liked_disliked_from_tabs():
    """Collect liked/disliked titles from all tab widgets into flat lists."""
    liked_all = []
    disliked_all = []
    if bodypart_tabs.children:
        for child in bodypart_tabs.children:
            if isinstance(child, widgets.HBox) and len(child.children) == 2:
                liked_bp, disliked_bp = child.children
                liked_all.extend(list(liked_bp.value))
                disliked_all.extend(list(disliked_bp.value))
    return liked_all, disliked_all

def update_global_summary(*args):
    """Update the global liked/disliked lists based on tab selections."""
    if not per_bodypart_tabs_toggle.value:
        return
        
    liked_all, disliked_all = _collect_liked_disliked_from_tabs()
    
    # Update the global widgets to show what has been selected
    liked_select.options = sorted(list(set(liked_all)))
    disliked_select.options = sorted(list(set(disliked_all)))

def filter_exercises(_):
    """Filter exercises and populate liked/disliked options (global or per body part)."""
    ex = _filter_exercises_dataframe()

    with exercise_list_output:
        clear_output()
        if ex.empty:
            print("No exercises match the selected filters.")
        else:
            print(f"Found {ex['exercise_title'].nunique()} matching exercises.")

    # Seperate tab per body part
    if per_bodypart_tabs_toggle.value:
        # Get body parts in the filtered set
        bodyparts_in_ex = sorted(ex['exercise_bodypart'].dropna().unique().tolist())
        children = []
        titles = []
        for bp in bodyparts_in_ex:
            ex_bp = ex[ex['exercise_bodypart'] == bp]
            titles_bp = sorted(ex_bp['exercise_title'].unique().tolist())
            liked_bp = widgets.SelectMultiple(
                options=titles_bp,
                value=(),
                description='Liked:',
                style={'description_width': '70px'},
                layout=widgets.Layout(width='450px', height='200px',border='2px solid green')
            )
            disliked_bp = widgets.SelectMultiple(
                options=titles_bp,
                value=(),
                description='Disliked:',
                style={'description_width': '70px'},
                layout=widgets.Layout(width='450px', height='200px', border='2px solid red')
            )
            
            # Attach observers
            liked_bp.observe(update_global_summary, names='value')
            disliked_bp.observe(update_global_summary, names='value')
            
            box = widgets.HBox([liked_bp, disliked_bp])
            children.append(box)
            titles.append(bp)
        
        if children:
            bodypart_tabs.children = children
            for i, title in enumerate(titles):
                bodypart_tabs.set_title(i, title)
        else:
            bodypart_tabs.children = []
        
        # Clear global lists initially
        liked_select.options = []
        disliked_select.options = []
    else:
        # Global list: ignore body part grouping
        titles = sorted(ex['exercise_title'].unique().tolist())
        liked_select.options = titles
        disliked_select.options = titles
        bodypart_tabs.children = []


def run_recommender(_):
    # Collect liked/disliked depending on UI mode
    if per_bodypart_tabs_toggle.value and bodypart_tabs.children:
        liked, disliked = _collect_liked_disliked_from_tabs()
    else:
        liked = list(liked_select.value)
        disliked = list(disliked_select.value)

    with recommendation_output:
        clear_output()
        if not liked:
            print("Please select at least one liked exercise.")
            return

        # Level/program filters: treat 'All' as no filter
        filter_level = None if level_dropdown.value == 'All' else level_dropdown.value
        selected_goal = None if goal_dropdown.value == 'All' else goal_dropdown.value

        recs = recommend_programs_from_exercises_manual_cf(
            liked_exercise_titles=liked,
            disliked_exercise_titles=disliked,
            top_k=10,
            filter_equipment=None,  # keep as-is for now
            filter_level=filter_level,
            min_time=None,
            max_time=None,
        )

        # Apply goal filter: check membership in the program_goal list string
        if selected_goal is not None and not recs.empty:
            import ast as _ast
            def _has_goal(val):
                try:
                    parsed = _ast.literal_eval(val) if isinstance(val, str) else val
                    if isinstance(parsed, (list, tuple)):
                        return selected_goal in parsed
                    return selected_goal == str(parsed)
                except Exception:
                    return False
            recs = recs[recs['program_goal'].apply(_has_goal)]

        if recs.empty:
            print("No matching programs found for the selected preferences.")
        else:
            display(recs)


filter_button.on_click(filter_exercises)
run_button.on_click(run_recommender)

# Layout
filters_box = widgets.HBox([
    widgets.VBox([level_dropdown, goal_dropdown, per_bodypart_tabs_toggle]),
    widgets.VBox([bodypart_select]),
    widgets.VBox([equipment_select]),
])

lists_box = widgets.HBox([
    liked_select,
    disliked_select,
])

buttons_box = widgets.HBox([filter_button, run_button])

ui = widgets.VBox([
    filters_box,
    buttons_box,
    exercise_list_output,
    widgets.Label("Select liked and disliked exercises from the lists below (or in each body-part tab), then click 'Recommend Programs'."),

    widgets.Label("Please use ctrl+click (or cmd+click on Mac) to select multiple exercises!"),
    lists_box,
    bodypart_tabs,
    recommendation_output,
])

ui

VBox(children=(HBox(children=(VBox(children=(Dropdown(description='Level:', layout=Layout(width='220px'), opti…