In [85]:
# set the environment path to find Recommenders
import sys

import itertools
import logging
import os

import numpy as np
import pandas as pd
import papermill as pm


from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.models.sar import SAR
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k


print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))

System version: 3.11.0 (main, Oct 24 2022, 18:26:48) [MSC v.1933 64 bit (AMD64)]
Pandas version: 1.5.3


In [86]:
TOP_K = 10
DATASIZE = '3k'


In [87]:
#load the user rating data 
user_rating_data = pd.read_csv('./user_rating_data.csv')
user_rating_data.head()

user_rating_data.columns = ['user_id', 'age', 'gender', 'exercise', 'rating']
user_rating_data['rating'] = user_rating_data['rating'].astype(np.float32)


In [88]:


def generator_rating_scaling(rating):
    # scale the rating to 0-1
    #avoid nans
    if rating == 0:
        yield 0
    else:
        yield rating/5


In [89]:
user_rating_data['rating'] = user_rating_data['rating'].apply(lambda x: next(generator_rating_scaling(x)))
user_rating_data.head()

Unnamed: 0,user_id,age,gender,exercise,rating
0,98407769-e971-4124-a031-0ddb72780126,37,False,Holman Heavy Double Split Pike,0.0
1,98fb9f73-5471-4b2b-a63f-3af732bb0304,57,False,Bodyweight Squat - Gethin Variation,0.0
2,56275ce6-70d1-4d0a-a69e-5515543ebff4,29,True,Cable Overhead Triceps Extension - Gethin Vari...,0.0
3,62febe82-9255-49a1-a1c5-5d0935979b18,23,False,Bench Press With Short Bands,0.2
4,39996226-f3a1-46f2-a891-0e4a47e397cd,48,True,Banded compass jump squat,0.6


In [90]:
train, test = python_stratified_split(user_rating_data, ratio=0.75, col_user="user_id", col_item="exercise", seed=42)

In [91]:


header = {
    "col_user": "user_id",
    "col_item": "exercise",
    "col_rating": "rating",
    "col_prediction": "prediction",

}



In [92]:


# set log level to INFO
logging.basicConfig(level=logging.DEBUG, 
                    format='%(asctime)s %(levelname)-8s %(message)s')

model = SAR(
    similarity_type="jaccard", 
    time_decay_coefficient=30, 
    time_now=None, 
    timedecay_formula=False, 
    **header
)




In [93]:
model.fit(train)

2023-03-01 02:25:31,144 INFO     Collecting user affinity matrix
2023-03-01 02:25:31,145 INFO     Creating index columns
2023-03-01 02:25:31,147 INFO     Building user affinity sparse matrix
2023-03-01 02:25:31,148 INFO     Calculating item co-occurrence
2023-03-01 02:25:31,150 INFO     Calculating item similarity
2023-03-01 02:25:31,151 INFO     Using jaccard based similarity
2023-03-01 02:25:31,189 INFO     Done training


In [94]:
top_k_seen = model.recommend_k_items(test, top_k=TOP_K, remove_seen=False)
top_k_not_seen = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

2023-03-01 02:25:31,634 INFO     Calculating recommendation scores
2023-03-01 02:25:31,640 INFO     Calculating recommendation scores
2023-03-01 02:25:31,643 INFO     Removing seen items
  return self._with_data(self.data * other)


In [95]:
top_k_seen.head()

Unnamed: 0,user_id,exercise,prediction
0,00c795d5-5c6e-4e44-a803-c5f4bbacd7d0,FYR2 Squat Jump,6.0
1,00c795d5-5c6e-4e44-a803-c5f4bbacd7d0,EZ-bar skullcrusher-,6.0
2,00c795d5-5c6e-4e44-a803-c5f4bbacd7d0,Feet-elevated crunch,6.0
3,00c795d5-5c6e-4e44-a803-c5f4bbacd7d0,Head-on-bench dumbbell rear delt raise,6.0
4,00c795d5-5c6e-4e44-a803-c5f4bbacd7d0,On-Your-Back Quad Stretch,6.0


In [96]:
top_k_not_seen.head()

Unnamed: 0,user_id,exercise,prediction
60,18f1f629-ed5b-4d9d-ae1f-c60982f40d1f,UP Seated Dumbbell Overhead Press,0.566667
70,1a5be596-f03b-4357-bc43-dcf5542c4eb2,HM Right Calf-Spike Sumo Pulse,0.5
90,1dbfabf4-9ca7-41db-8540-a01f2d6e0311,Holman Daisy Cutter,0.333333
120,24d24c6f-6093-47d2-873f-4dbb4a9b0e5d,Holman Weighted Killer Abs with Overhead Press,0.666667
121,24d24c6f-6093-47d2-873f-4dbb4a9b0e5d,Sled Overhead Triceps Extension,0.666667
