In [141]:
import pandas as pd
import matplotlib.pyplot as plt

In [142]:
# Importing the dataset
df = pd.read_csv("megaGymDataset.csv")
df

Unnamed: 0.1,Unnamed: 0,Title,Desc,Type,BodyPart,Equipment,Level,Rating,RatingDesc
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0,
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,,
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,,
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,,
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,,
...,...,...,...,...,...,...,...,...,...
2913,2913,EZ-bar skullcrusher-,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate,8.1,Average
2914,2914,Lying Close-Grip Barbell Triceps Press To Chin,,Strength,Triceps,E-Z Curl Bar,Beginner,8.1,Average
2915,2915,EZ-Bar Skullcrusher - Gethin Variation,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate,,
2916,2916,TBS Skullcrusher,The EZ-bar skullcrusher is a popular exercise ...,Strength,Triceps,E-Z Curl Bar,Intermediate,,


In [143]:
#Cheking if there is any NULL or missing values
df.isna().sum()

Unnamed: 0       0
Title            0
Desc          1550
Type             0
BodyPart         0
Equipment        0
Level            0
Rating        1887
RatingDesc    2056
dtype: int64

In [144]:
# Removing columns with lots of nonvalues
df = df.drop('Rating', axis=1)
df = df.drop('RatingDesc', axis=1)
# Removing all rows containing nonvalues in description
df = df[df['Desc'].notna()]
# Removing ID column
df.pop(df.columns[0])



0          0
1          1
2          2
3          3
4          4
        ... 
2911    2911
2912    2912
2913    2913
2915    2915
2916    2916
Name: Unnamed: 0, Length: 1368, dtype: int64

In [145]:
# Checking datatypes
df.dtypes

Title        object
Desc         object
Type         object
BodyPart     object
Equipment    object
Level        object
dtype: object

In [146]:
# Merging columns for cosign similarity and dropping excess columns
df["Merged"] = df["Type"].astype(str) + '|' + \
  df["BodyPart"].astype(str) + '|' + df["Equipment"].astype(str) + '|' + \
  df["Level"]

df = df.drop('Type', axis=1)
df = df.drop('BodyPart', axis=1)
df = df.drop('Equipment', axis=1)
df = df.drop('Level', axis=1)

In [147]:
# The merged columns
df["Merged"]

0           Strength|Abdominals|Bands|Intermediate
1           Strength|Abdominals|Bands|Intermediate
2           Strength|Abdominals|Bands|Intermediate
3           Strength|Abdominals|Bands|Intermediate
4           Strength|Abdominals|Bands|Intermediate
                           ...                    
2911    Strength|Triceps|E-Z Curl Bar|Intermediate
2912    Strength|Triceps|E-Z Curl Bar|Intermediate
2913    Strength|Triceps|E-Z Curl Bar|Intermediate
2915    Strength|Triceps|E-Z Curl Bar|Intermediate
2916    Strength|Triceps|E-Z Curl Bar|Intermediate
Name: Merged, Length: 1368, dtype: object

In [148]:
# Converting values of the merged column into vectors

from sklearn.feature_extraction.text import CountVectorizer
count = CountVectorizer()
count_matrix = count.fit_transform(df.loc[:,"Merged"])

liste = count_matrix.toarray()

In [149]:
# Cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
sim_matrix = cosine_similarity(count_matrix, count_matrix)

In [150]:
#sim_matrix

In [151]:
# Resetting the index to avoid indexing errors and NAN values in recommender
# This makes the previous indexes invalid
# "drop" avoids adding the old index as a column
df = df.reset_index(drop = False)

In [152]:
def recommender(data_frame, exercise_id, sim_matrix):
    sim_df = pd.DataFrame(sim_matrix[exercise_id],
                         columns=["similarity"])
    exercise_titles = data_frame.loc[:, "Title"]
    exercise_rec = pd.concat([sim_df, exercise_titles], axis = 1)

    exercise_rec = exercise_rec.sort_values(by=["similarity"], ascending = False)

    return exercise_rec.iloc[1:20,:]

In [153]:
# Prints the row of the given Title to find the index
print(df[df["Title"] == "Bench press"])

     index        Title                                               Desc  \
454    942  Bench press  The bench press is a popular barbell exercise ...   

                                  Merged  
454  Strength|Chest|Barbell|Intermediate  


In [154]:
# Exercises similar to bench press
recommender(df, 454, sim_matrix)

Unnamed: 0,similarity,Title
459,1.0,King Maker Barbell Bench Press
445,1.0,Barbell Bench Press - Medium Grip
446,1.0,Decline barbell bench press
448,1.0,Barbell Incline Bench Press Medium-Grip
452,1.0,Paul Carter Incline Bench Press
453,1.0,Incline bench press
455,1.0,TBS Close-Grip Bench Press
456,1.0,AM Flat Bench Barbell Press
457,1.0,UP Bench Press
458,1.0,UP Close-Grip Bench Press


In [155]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [156]:
tfidf = TfidfVectorizer(stop_words="english")
overview_matrix = tfidf.fit_transform(df["Desc"])
overview_matrix.shape

(1368, 1918)

In [157]:
similarity_matrix = linear_kernel(overview_matrix, overview_matrix)
print(similarity_matrix[0:5,0:5])

[[1.         0.15336265 0.1591873  0.16544597 0.12342733]
 [0.15336265 1.         0.13065926 0.9269652  0.21069478]
 [0.1591873  0.13065926 1.         0.14095379 0.0042808 ]
 [0.16544597 0.9269652  0.14095379 1.         0.22729524]
 [0.12342733 0.21069478 0.0042808  0.22729524 1.        ]]


In [158]:
mapping = pd.Series(df.index, index = df["Desc"])
mapping

Desc
The partner plank band row is an abdominal exercise where two partners perform single-arm planks while pulling on the opposite ends of an exercise band. This technique can be done for time or reps in any ab-focused workout.                                                                                                                                                0
The banded crunch isometric hold is an exercise targeting the abdominal muscles, particularly the rectus abdominis or "six-pack" muscles. The band adds resistance and continuous tension to this popular exercise.                                                                                                                                                            1
The banded plank jack is a variation on the plank that involves moving the legs in and out for repetitions. Having a band around the thighs forces the lower body to work harder, particularly the hips and glutes. The plank jack is commonly performed as part 

In [159]:
def recommender_by_desc(exercise_input):
    exercise_index = mapping[exercise_input]
    similarity_score = list(enumerate(similarity_matrix[exercise_index]))
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    similarity_score = similarity_score[0:5]

    exercise_indices = [i[0] for i in similarity_score]
    return df["Desc"].iloc[exercise_indices]

In [160]:
recommender_by_desc(df["Desc"][0])

0      The partner plank band row is an abdominal exe...
795    The partner side plank band row is an abdomina...
332    The standing band biceps curl is an arm-focuse...
844    The band seated row is a band alternative to t...
182    The plank up-down is a variation of the plank ...
Name: Desc, dtype: object

In [161]:
df_users = pd.read_csv('user_exercise_ratings.csv')

df_users.head()

Unnamed: 0,userId,title,rating
0,1,AM Decline Abs,4.6
1,1,Skip,4.3
2,1,Child's pose,2.5
3,1,Barbell glute bridge,4.6
4,1,Tire flip,3.7
