In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [41]:
df = pd.read_csv("AugmentedExercises.csv")
df = df.sort_values("Rating", ascending=False)
df.head()

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,Movement,MovementType,Intensity,InjuryRisk,SecondaryMuscles
142,Single-Leg Press,The single-leg leg press is an exercise target...,Strength,Quadriceps,Machine,Intermediate,9.6,legs,Isolation,Low,High,
4,Landmine twist,The landmine twist is a rotational abdominal m...,Strength,Abdominals,Other,Intermediate,9.5,core,Compound,Low,Low,Obliques
76,Palms-down wrist curl over bench,The palms-down wrist curl over bench is an exe...,Strength,Forearms,Barbell,Intermediate,9.5,Other,Isolation,High,Low,
97,Weighted pull-up,The weighted pull-up is a more advanced variat...,Strength,Lats,Other,Intermediate,9.5,pull,Isolation,Low,Low,
162,Dumbbell front raise to lateral raise,The dumbbell front raise to lateral raise is a...,Strength,Shoulders,Dumbbell,Intermediate,9.5,push,Isolation,Medium,Low,


In [25]:
# Split SecondaryMuscles into individual terms
df['SecondaryMuscles'] = df['SecondaryMuscles'].fillna('').str.split(', ')

# Create binary matrices for categorical features
bodypart_dummies = pd.get_dummies(df['BodyPart'], prefix='BodyPart')
movement_dummies = pd.get_dummies(df['Movement'], prefix='Movement')
equipment_dummies = pd.get_dummies(df['Equipment'], prefix='Equipment')
level_dummies = pd.get_dummies(df['Level'], prefix='Level')

# Explode and encode SecondaryMuscles
secondary_exp = df['SecondaryMuscles'].explode()
secondary_dummies = pd.get_dummies(secondary_exp, prefix='Secondary').groupby(level=0).max()

# Combine all categorical features
categorical_matrix = pd.concat([
    bodypart_dummies, 
    movement_dummies, 
    equipment_dummies,
    level_dummies,
    secondary_dummies
], axis=1).fillna(0)

In [27]:
categorical_sim = cosine_similarity(categorical_matrix)

In [37]:
tfidf = TfidfVectorizer(stop_words='english')
desc_matrix = tfidf.fit_transform(df['Desc'])
text_sim = linear_kernel(desc_matrix, desc_matrix)
combined_sim = 0.5 * categorical_sim + 0.5 * text_sim

In [31]:
indices = pd.Series(df.index, index=df['Title']).drop_duplicates()

In [33]:
def get_recommendations(title, similarity_matrix=combined_sim, num_recommend=10):
    idx = indices[title]
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in sim_scores[1:num_recommend+1]]
    return df.iloc[top_indices]

In [43]:
get_recommendations('Dumbbell front raise to lateral raise', num_recommend = 10)

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,Movement,MovementType,Intensity,InjuryRisk,SecondaryMuscles
93,Stiff-Legged Dumbbell Deadlift,The dumbbell stiff-legged deadlift targets the...,Strength,Hamstrings,Dumbbell,Intermediate,8.8,legs,Isolation,Medium,Low,
91,Kettlebell pass-through lunge,The kettlebell pass-through lunge is a dynamic...,Strength,Hamstrings,Kettlebells,Intermediate,8.7,legs,Isolation,High,Low,
88,Barbell Deadlift,The barbell deadlift is a compound exercise us...,Strength,Hamstrings,Barbell,Intermediate,9.4,legs,Isolation,High,Low,
94,Lying Leg Curls,The lying leg curl is a popular machine-based ...,Strength,Hamstrings,Machine,Beginner,8.9,legs,Isolation,Low,Low,
90,Power clean,The power clean is a full-body movement in whi...,Strength,Hamstrings,Barbell,Intermediate,8.7,legs,Isolation,High,Low,
87,Step-up with knee raise,The step-up with knee raise is a lower-body ex...,Strength,Glutes,Body Only,Intermediate,8.7,legs,Isolation,Low,Low,
142,Single-Leg Press,The single-leg leg press is an exercise target...,Strength,Quadriceps,Machine,Intermediate,9.6,legs,Isolation,Low,High,
86,Single-leg glute bridge,The single-leg glute bridge is a lower-body ex...,Strength,Glutes,Body Only,Intermediate,8.8,legs,Isolation,Low,Low,
144,Leg Press,The leg press is a machine exercise targeting ...,Strength,Quadriceps,Machine,Intermediate,8.8,legs,Isolation,Low,High,
92,Romanian Deadlift With Dumbbells,The dumbbell stiff-legged deadlift targets the...,Strength,Hamstrings,Dumbbell,Beginner,9.4,legs,Isolation,Medium,Low,


In [9]:
df[df["Rating"]==df["Rating"].min()]

Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,Movement,MovementType,Intensity,InjuryRisk,SecondaryMuscles
45,Standing concentration curl,The standing concentration curl is a variation...,Strength,Biceps,Dumbbell,Beginner,8.7,pull,Isolation,Medium,Low,[]
148,Smith machine back squat,The Smith machine back squat is a machine-base...,Strength,Quadriceps,Machine,Intermediate,8.7,legs,Isolation,Low,Low,[]
138,Dumbbell reverse lunge,The dumbbell reverse lunge is a popular lower-...,Strength,Quadriceps,Dumbbell,Intermediate,8.7,legs,Compound,Medium,Low,"[Glutes, Hamstrings]"
42,Dumbbell Alternate Bicep Curl,The alternating biceps curl is a single-joint ...,Strength,Biceps,Dumbbell,Intermediate,8.7,pull,Isolation,Medium,Low,[]
43,Single-arm dumbbell preacher curl,The single-arm dumbbell preacher curl is an ex...,Strength,Biceps,Dumbbell,Intermediate,8.7,pull,Isolation,Medium,Low,[]
44,Alternate Incline Dumbbell Curl,The alternating incline dumbbell biceps curl i...,Strength,Biceps,Dumbbell,Intermediate,8.7,pull,Isolation,Medium,Low,[]
90,Power clean,The power clean is a full-body movement in whi...,Strength,Hamstrings,Barbell,Intermediate,8.7,legs,Isolation,High,Low,[]
134,Goblet Squat,The goblet squat is a popular lower-body exerc...,Strength,Quadriceps,Kettlebells,Intermediate,8.7,legs,Isolation,High,Low,[]
52,Seated Calf Raise,The machine seated calf raise is an exercise t...,Strength,Calves,Machine,Intermediate,8.7,legs,Isolation,Low,Low,[]
53,Calf Press On The Leg Press Machine,The leg press calf raise is an exercise that u...,Strength,Calves,Machine,Intermediate,8.7,legs,Isolation,Low,Low,[]
