In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Importing the dataset
df = pd.read_csv("megaGymDataset.csv")
df = df.rename(columns={'Unnamed: 0': 'index'})
df

In [None]:
#Cheking if there is any NULL or missing values
df.isna().sum()

In [None]:
# DATA ANALYSIS

# Some exercises has the same title - Should remove duplicates?
df = df.drop_duplicates('Title', keep='last')
df['Title'].value_counts()

In [None]:
# Sorted bv level
df['Level'].value_counts().plot.barh()

In [None]:
# sorted by type
df['Type'].value_counts().plot.barh()

In [None]:
# sorted by bodypart
df['BodyPart'].value_counts().plot.barh()

In [None]:
# top rated exercises
ratingSorted= df.sort_values(by='Rating',ascending=False)
ratingSorted =ratingSorted.head(10)
ratingSorted

In [None]:
# Prints the row of the given Title to find the index
print(df[df["Title"] == "Bench press"])
df.loc[df['Title'] == "Bench press", 'Rating'] = 10
print(df[df["Title"] == "Bench press"])

In [None]:
df_sorted = df.sort_values(by="Rating")
# Create a histogram of the "Ratings" column
plt.figure(figsize=(10, 6))  # Adjust the figure size as needed

# Create a histogram of the "Ratings" column
plt.hist(df_sorted["Rating"], bins=20, edgecolor="k", alpha=0.7)

plt.title("Distribution of Ratings")
plt.xlabel("Rating")
plt.ylabel("Frequency")
plt.grid(False)
plt.xlim(df_sorted["Rating"].min(), df_sorted["Rating"].max())
plt.ylim(0, plt.gca().get_ylim()[1])
plt.show()

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from copy import deepcopy
import numpy as np

# Datasett for trening. Gjør om strenger til kategorier (int)
x = deepcopy(df)
x = x.drop(["Title"], axis = 1)
x = x.drop(["Desc"], axis = 1)
x = x.drop(["RatingDesc"], axis = 1)
x['Level'] = pd.factorize(x['Level'])[0]
x['Type'] = pd.factorize(x['Type'])[0]
x['BodyPart'] = pd.factorize(x['BodyPart'])[0]
x['Equipment'] = pd.factorize(x['Equipment'])[0]
x = x[x['Rating'].notna()]
x = x[df["Rating"] != 0]
# Verdier som skal predikeres, brukes for trening og testing
y = x["Rating"]
x = x.drop(["Rating"], axis = 1)

# Grid search for å finne beste params
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors': [3,5,7,9,11,13,15,17],
    'p': [1, 2]
}
grid_search = GridSearchCV(estimator=KNeighborsRegressor(), param_grid=param_grid, scoring='accuracy', cv=5)
grid_search.fit(x, y)
params = grid_search.best_params_

# Traiing
knn = KNeighborsRegressor(n_neighbors = params['n_neighbors'], p = params["p"])
knn.fit(x, y)


# Ny variabel X. Alle rader fra dataframe som ikke har rating
x = deepcopy(df)
# Ekskluderer øvelser med ratings
x = df[df['Rating'].isin([0, np.nan])]

# Gjør om strenger til kategorier (int) for prediction
x = x.drop(["Rating"], axis = 1)
x = x.drop(["Title"], axis = 1)
x = x.drop(["Desc"], axis = 1)
x = x.drop(["RatingDesc"], axis = 1)
x['Level'] = pd.factorize(x['Level'])[0]
x['Type'] = pd.factorize(x['Type'])[0]
x['BodyPart'] = pd.factorize(x['BodyPart'])[0]
x['Equipment'] = pd.factorize(x['Equipment'])[0]

# Antall nonvalues
print("Nonvalues rating before:",df["Rating"].isna().sum())

# Predikerer en rating for hver rad i dataframe som ikke har rating
for index, row in x.iterrows():
    rating = knn.predict([row]).round(decimals=1)
    df.loc[df['index'] == index, 'Rating'] = rating

print("Nonvalues rating after",df["Rating"].isna().sum())

filtered_df = df[df["Rating"] == 0]
print(len(filtered_df))

In [None]:
df_sorted = df.sort_values(by="Rating")
# Create a histogram of the "Ratings" column
plt.figure(figsize=(10, 6))  # Adjust the figure size as needed

# Create a histogram of the "Ratings" column
plt.hist(df_sorted["Rating"], bins=20, edgecolor="k", alpha=0.7)

plt.title("Distribution of Ratings")
plt.xlabel("Rating")
plt.ylabel("Frequency")
plt.grid(False)
plt.xlim(df_sorted["Rating"].min(), df_sorted["Rating"].max())
plt.ylim(0, plt.gca().get_ylim()[1])
plt.show()

In [None]:
# Removing irrelevant columns
df = df.drop('RatingDesc', axis=1)
# Removing all rows containing nonvalues in description
df = df[df['Desc'].notna()]
# Removing ID column
df.pop(df.columns[0])

# Dataset after preprocessing
clean_df = deepcopy(df)


In [None]:
# Checking datatypes
df.dtypes

In [None]:
# Merging columns for cosign similarity and dropping excess columns
df["Merged"] = df["Type"].astype(str) + '|' + \
  df["BodyPart"].astype(str) + '|' + df["Equipment"].astype(str) + '|' + \
  df["Level"]

df = df.drop('Type', axis=1)
df = df.drop('BodyPart', axis=1)
df = df.drop('Equipment', axis=1)
df = df.drop('Level', axis=1)

In [None]:
# The merged columns
df["Merged"]

In [None]:
# Converting values of the merged column into vectors

from sklearn.feature_extraction.text import CountVectorizer
count = CountVectorizer()
count_matrix = count.fit_transform(df.loc[:,"Merged"])

liste = count_matrix.toarray()

In [None]:
# Cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
sim_matrix = cosine_similarity(count_matrix, count_matrix)

In [None]:
#sim_matrix

In [None]:
# Resetting the index to avoid indexing errors and NAN values in recommender
# This makes the previous indexes invalid
# "drop" avoids adding the old index as a column
df = df.reset_index(drop = False)

In [None]:
def recommender(data_frame, exercise_id, sim_matrix):
    sim_df = pd.DataFrame(sim_matrix[exercise_id],
                         columns=["Similarity"])
    exercise_titles = data_frame.loc[:, "Title"]
    exercise_rec = pd.concat([sim_df, exercise_titles], axis = 1)
    return exercise_rec

In [None]:
# Prints the row of the given Title to find the index
row = df[df["Title"] == "Bench press"]
index = row.index

In [None]:
# Exercises similar to bench press
df_by_cat = recommender(df, 1115, sim_matrix)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [None]:
tfidf = TfidfVectorizer(stop_words="english")
overview_matrix = tfidf.fit_transform(df["Desc"])
overview_matrix.shape

In [None]:
similarity_matrix = linear_kernel(overview_matrix, overview_matrix)
print(similarity_matrix[0:5,0:5])

In [None]:
mapping = pd.Series(df.index, index = df["Desc"])
mapping

In [None]:

def recommender_by_desc(exercise_input, df, similarity_matrix, mapping):
    exercise_index = mapping[exercise_input]
    if not isinstance(exercise_index, np.int64):
        exercise_index = exercise_index[0]
    similarity_score = list(enumerate(similarity_matrix[exercise_index]))
    score = [tup[1] for tup in similarity_score]
    exercise_indices = [i[0] for i in similarity_score]
    df2 = df["Title"].iloc[exercise_indices].to_frame()
    df2["Similarity"] = score
    return df2


In [None]:
df_by_desc = recommender_by_desc(df["Desc"][1115], df, similarity_matrix, mapping)

In [None]:
merged_df = df_by_cat.copy()
merged_df["Similarity"] = (df_by_cat["Similarity"] + df_by_desc["Similarity"]) / 2
merged_df = merged_df.sort_values(by=["Similarity"], ascending=False)
merged_df[0:10]

In [None]:
df = deepcopy(clean_df)

def knowledge_based_rec(dataframe, type=None, bodypart=None, equipment=None, level=None):
    if type:
        dataframe = dataframe[df["Type"] == type]
    if bodypart:
        dataframe = dataframe[df["BodyPart"] == bodypart]
    if equipment:
        dataframe = dataframe[df["Equipment"] == equipment]
    if level:
        dataframe = dataframe[df["Level"] == level]

    recommendations = dataframe[["Title" , "Rating"]]
    return recommendations.sort_values(by="Rating", ascending=False).iloc[:10]

print(knowledge_based_rec(df, type="", bodypart="Chest", equipment="Bands", level=""))

In [None]:
import pandas as pd
import tkinter as tk
from tkinter import ttk

# Load your gym exercise data into a Pandas DataFrame
# Make sure to specify the correct encoding if you have special characters
df = deepcopy(clean_df)

# Create a tkinter GUI
root = tk.Tk()
root.title("Gym Exercise Recommender")

# Function to filter and display exercises
def recommend_exercises():
    selected_bodypart = bodypart_var.get()
    selected_level = level_var.get()
    selected_type = type_var.get()

    filtered_df = df[
        (df['BodyPart'] == selected_bodypart) &
        (df['Level'] == selected_level) &
        (df['Type'] == selected_type)
    ]

    exercise_list.delete(0, tk.END)  # Clear the listbox

    for i, title in enumerate(filtered_df['Title']):
        exercise_list.insert(tk.END, f'{i + 1}. {title}')

# Create and configure GUI elements
bodypart_label = ttk.Label(root, text="Select Body Part:")
bodypart_label.pack()
my_bps = [i for i in df["BodyPart"].unique()]
bodypart_var = ttk.Combobox(root, values=my_bps)
bodypart_var.pack()

level_label = ttk.Label(root, text="Select Level:")
level_label.pack()
my_levels = [i for i in df["Level"].unique()]
level_var = ttk.Combobox(root, values=my_levels)
level_var.pack()

type_label = ttk.Label(root, text="Select Type:")
type_label.pack()
my_types = [i for i in df["Type"].unique()]
type_var = ttk.Combobox(root, values=my_types)
type_var.pack()

recommend_button = ttk.Button(root, text="Recommend Exercises", command=recommend_exercises)
recommend_button.pack()

exercise_list = tk.Listbox(root)
#exercise_list.pack()

#root.mainloop()

In [None]:
# Collaborative filtering
"""
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader

ratings_df = pd.read_csv("user_ratings.csv")
ratings = ratings_df.melt(id_vars=['Users'], var_name='Exercise', value_name='Rating')
ratings.to_csv("ratings2.csv")

ratings = pd.read_csv("ratings2.csv")
ratings = ratings.drop("Unnamed: 0", axis=1)
ratings['Exercise'] = pd.factorize(ratings['Exercise'])[0]

# Use ratings DataFrame for pivoting
ratings_full = ratings.pivot(index="Users", columns="Exercise", values="Rating")

# Now you can use ratings_full for further analysis
ratings_full"""

In [None]:
# Collaborative filtering
from surprise import KNNBasic
from surprise import Dataset
from surprise import Reader

# Leser DataFrame
ratings_df = pd.read_csv("user_ratings.csv")

# Converting to the correct format
ratings = ratings_df.melt(id_vars=['Users'], var_name='Exercise', value_name='Rating')
#ratings

# Factorization?
#ratings['Exercise'] = pd.factorize(ratings['Exercise'])[0]

# Use ratings DataFrame for pivoting
ratings_full = ratings.pivot(index="Users", columns="Exercise", values="Rating")
ratings_full.fillna(0).astype(int)

# Training
reader = Reader(rating_scale=(1,10))
data = Dataset.load_from_df(ratings[["Users", "Exercise", "Rating"]], reader)
trainset = data.build_full_trainset()

# Variables
user_rating = trainset.ur
item_rating = trainset.ir
num_users = trainset.n_users
num_items = trainset.n_items
num_ratings = trainset.n_ratings

print("Users", num_users)
print("Items", num_items)
print("Ratings", num_ratings)
density = num_ratings / (num_users * num_items) 
density = np.round(density, decimals=2)
sparsity = 1 - density
print("Density:", density*100, "%")
print("Sparsity:", sparsity*100, "%")


sim_options = {"name": "pearson",
               "user based": True,
               "shrinkage": 0}

rec = KNNBasic(sim_options=sim_options)
rec.fit(trainset)

user_sim_matrix = rec.sim
user_sim_matrix
