In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

DATAPATH = "../input/megaGymDataset_original.csv"

In [2]:
dataset = pd.read_csv(DATAPATH)
features = ['Type', 'BodyPart', 'Equipment', 'Level']
dataset.Rating = dataset.Rating.fillna(-1).astype(float)
dataset.Equipment = dataset.Equipment.fillna("Body Only")
display(dataset.info())
dataset.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2918 entries, 0 to 2917
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         2918 non-null   int64  
 1   Title      2918 non-null   object 
 2   Desc       2918 non-null   object 
 3   Type       2918 non-null   object 
 4   BodyPart   2918 non-null   object 
 5   Equipment  2918 non-null   object 
 6   Level      2918 non-null   object 
 7   Rating     2918 non-null   float64
dtypes: float64(1), int64(1), object(6)
memory usage: 182.5+ KB


None

Unnamed: 0,ID,Title,Desc,Type,BodyPart,Equipment,Level,Rating
0,0,Partner plank band row,The partner plank band row is an abdominal exe...,Strength,Abdominals,Bands,Intermediate,0.0
1,1,Banded crunch isometric hold,The banded crunch isometric hold is an exercis...,Strength,Abdominals,Bands,Intermediate,-1.0
2,2,FYR Banded Plank Jack,The banded plank jack is a variation on the pl...,Strength,Abdominals,Bands,Intermediate,-1.0
3,3,Banded crunch,The banded crunch is an exercise targeting the...,Strength,Abdominals,Bands,Intermediate,-1.0
4,4,Crunch,The crunch is a popular core exercise targetin...,Strength,Abdominals,Bands,Intermediate,-1.0


In [3]:
# Function to recommend top 10 exercises based on similarity scores
def recommend_exercises(user_input):
    features = dataset['Type'] + ' ' + dataset['BodyPart'] + ' ' + dataset['Equipment'] + ' ' + dataset['Level']
    
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(features)

    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    indices = dataset.index[
        (dataset['Type'] == user_input[0]) &
        (dataset['BodyPart'] == user_input[1]) &
        (dataset['Equipment'].str.contains(user_input[2], case=False)) &
        (dataset['Level'] == user_input[3])
    ].tolist()

    exercise_scores = list(enumerate(cosine_sim[indices[0]]))
    exercise_scores = sorted(exercise_scores, key=lambda x: x[1], reverse=True)
    exercise_scores = exercise_scores[:10]  # Exclude the exercise itself and select top 10
    exercise_indices = [i[0] for i in exercise_scores]
    
    return dataset.loc[exercise_indices]

In [4]:
user_input = ["Strength", "Chest", "Body Only", "Intermediate"]
recommend_exercises(user_input)

Unnamed: 0,ID,Title,Desc,Type,BodyPart,Equipment,Level,Rating
960,960,Pushups,The push-up is a popular bodyweight exercise t...,Strength,Chest,Body Only,Intermediate,9.2
961,961,Incline Push-Up,The hands-elevated push-up is a variation on t...,Strength,Chest,Body Only,Intermediate,8.8
963,963,Decline Push-Up,The feet-elevated push-up is a variation on th...,Strength,Chest,Body Only,Intermediate,8.6
965,965,Close push-up to wide push-up,The close push-up to wide push-up combines two...,Strength,Chest,Body Only,Intermediate,8.5
967,967,Push-Ups With Feet On An Exercise Ball,Push-up variation performed with feet elevated...,Strength,Chest,Body Only,Intermediate,8.5
968,968,Feet-elevated push-up,The feet-elevated push-up is a variation on th...,Strength,Chest,Body Only,Intermediate,8.5
973,973,Sphinx push-up,The sphinx push-up is a difficult variation of...,Strength,Chest,Body Only,Intermediate,8.0
977,977,Close-grip hands-elevated push-up,The close-grip hands-elevated push-up is a var...,Strength,Chest,Body Only,Intermediate,7.4
978,978,Suspended push-up,The suspended push-up is a bodyweight pushing ...,Strength,Chest,Body Only,Intermediate,7.3
979,979,Push Up to Side Plank,The push-up to side plank is an upper-body and...,Strength,Chest,Body Only,Intermediate,7.1


In [5]:
import time
start = time.time()

# Iterate over rows with missing ratings
display("Assign similar ratings... ")
progress = 1/20
for index, row in dataset[dataset.Rating == -1].iterrows():
    similar_ratings = recommend_exercises(row[features].to_list()).Rating
    dataset.loc[index, "Rating"] = similar_ratings[similar_ratings != -1].mean()
    if index/(len(dataset)-1) >= progress:
        print(index, int(progress*100), "%... ")
        progress += 1/20

dataset.Rating = dataset.Rating.fillna(0)

end = time.time()
time_spent = end - start
minutes = int(time_spent // 60)
seconds = int(time_spent % 60)
print(f"Time spent: {minutes} minutes and {seconds} seconds")

'Assign similar ratings... '

146 5 %... 
292 10 %... 
438 15 %... 
584 20 %... 
730 25 %... 
877 30 %... 
1021 35 %... 
1169 40 %... 
1316 44 %... 
1459 49 %... 
1605 54 %... 
1755 60 %... 
1897 65 %... 
2042 70 %... 
2188 75 %... 
2334 80 %... 
2480 85 %... 
2626 90 %... 
2786 95 %... 
Time spent: 4 minutes and 29 seconds


In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2918 entries, 0 to 2917
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         2918 non-null   int64  
 1   Title      2918 non-null   object 
 2   Desc       2918 non-null   object 
 3   Type       2918 non-null   object 
 4   BodyPart   2918 non-null   object 
 5   Equipment  2918 non-null   object 
 6   Level      2918 non-null   object 
 7   Rating     2918 non-null   float64
dtypes: float64(1), int64(1), object(6)
memory usage: 182.5+ KB


In [7]:
dataset.to_csv("../input/megaGymDataset.csv", index=False)