In [1]:
import pandas as pd
import numpy as np
#from keras.layers import Input, Embedding, Flatten, Dot
#from keras.models import Model
#import redis
import fakeredis

In [2]:
# create a fake Redis database
fake_redis = fakeredis.FakeStrictRedis()

In [3]:
from datetime import date, timedelta, datetime

def calculate_age(birth_date):
    today = date.today()
    age = today.year - birth_date.year

    # Check if birthday has not occurred yet this year
    if today.month < birth_date.month or (today.month == birth_date.month and today.day < birth_date.day):
        age -= 1

    return age

In [4]:
import random
def generate_false_true(prob_false=0.8):
    if random.random() < prob_false:
        return False
    else:
        return True

In [5]:
from faker import Faker
from gender_guesser.detector import Detector

# Create an instance of the Faker class
fake = Faker()
# Create an instance of the Detector class
detector = Detector()

# Define the fields for your fake data
fields = ['name', 'birthdate', 'gender', 'location']

# Generate fake data for a specific number of records
num_users = 1000
fake_data = []
for id in range(num_users):

    record = {'id': id, 'name': fake.name(), 'birthdate': fake.date_of_birth(minimum_age=14, maximum_age=90) , 'city': fake.city(), 'trainer': generate_false_true() }
    record['gender'] = detector.get_gender(record['name'].split(' ')[0])
    record['age'] = calculate_age(record['birthdate'])
    # If name is not clear if it is feminine or masculin, then set it to 'Prefer not to say'.
    if record['gender'] != 'male' and record['gender'] != 'female':
        record['gender'] = 'Prefer not to say'
    fake_data.append(record)

# Print the generated fake data
"""
for record in fake_data:
    print(record)
"""

'\nfor record in fake_data:\n    print(record)\n'

In [6]:
users = pd.DataFrame(fake_data)
users.head()

Unnamed: 0,id,name,birthdate,city,trainer,gender,age
0,0,Kiara Weiss,1939-10-31,Port Paulberg,False,female,83
1,1,Jeffrey Schultz,1955-10-30,West Michael,False,male,67
2,2,David Martinez,2005-02-07,Nunezchester,False,male,18
3,3,Megan Mercer,1996-06-03,East Nathan,False,female,26
4,4,Justin Mcdonald,1954-01-30,West David,False,male,69


In [7]:
trainers = users[users['trainer'] == True]
trainers_ids = trainers['id'].values

In [8]:
# Create a custom provider for fitness disciplines
class FitnessProvider:
    def __init__(self, faker):
        self.faker = faker

    def fitness_discipline(self):
        disciplines = [
            'Yoga',
            'Pilates',
            'CrossFit',
            'Zumba',
            'Kickboxing',
            'Spinning',
            'Barre',
            'HIIT',
            'Aerobics',
            'Boxing',
            'Personalized',
            'Body Combat',
            'Body Pump',
            'GAP',
            'Total Body Conditioning',
            'ABS',
            'Stretching'
        ]
        return self.faker.random_element(disciplines)
    def level(self):
        levels = ['begginer', 'intermediate', 'advanced', 'all levels']
        return self.faker.random_element(levels)

# Create an instance of the Faker class
fake = Faker()

# Add the custom provider to the Faker instance
fake.add_provider(FitnessProvider)

In [9]:
def approximate_datetime(dt):
    # Calculate the number of minutes past the hour
    minutes_past_hour = dt.minute + dt.second / 60

    # Determine the rounding factor based on the number of minutes past the hour
    rounding_factor = 30 if minutes_past_hour >= 30 else 0

    # Calculate the rounded datetime
    rounded_dt = dt.replace(minute=0, second=0) + timedelta(minutes=rounding_factor)

    return rounded_dt

In [10]:
import numpy as np
# Generate fake data for a specific number of records
num_classes = 1000
classes = []
mean = 15
var = 3
today = datetime.now()
approx_today =  approximate_datetime(today)
for id in range(num_classes):
    price = np.random.normal(mean, var, 1)[0]
    price = round(price,2)
    random_days = random.randint(1, 365)
    # Generate a random number of 30-minute intervals
    random_intervals = random.randint(0, 48)  # 48 intervals in a day (24 hours * 2 intervals per hour) 
    record = {'id': id, 'category': fake.fitness_discipline(), 'creator': random.choice(trainers_ids) ,
               'level':fake.level(), 'maxUsers': random.randint(1, 50), 'price': price, 'datetime': approx_today+ timedelta(days=random_days, minutes=30 * random_intervals) }
    record['title'] = 'A ' + record['category'] + ' class'

    classes.append(record)

In [11]:
classes_df = pd.DataFrame(classes)
classes_df.head()

Unnamed: 0,id,category,creator,level,maxUsers,price,datetime,title
0,0,Body Pump,597,all levels,9,18.88,2023-07-16 00:30:00.702515,A Body Pump class
1,1,Kickboxing,888,intermediate,3,13.59,2024-01-03 22:30:00.702515,A Kickboxing class
2,2,Zumba,592,intermediate,8,13.9,2023-12-25 17:00:00.702515,A Zumba class
3,3,Barre,719,all levels,45,16.36,2023-11-10 23:30:00.702515,A Barre class
4,4,HIIT,173,begginer,31,19.7,2023-09-04 22:30:00.702515,A HIIT class


In [12]:
data = []
for user_id in range(num_users):  
    for _ in range(num_classes):  
        course_id = fake.random_int(min=0, max=num_classes-1)  # Generate a random course ID
        is_nan = random.random()
        # Create sparsity
        if is_nan <= 0.95:
            rating = np.nan
        else:
            rating = random.randint(1,5) # Generate a random rating

        data.append({'User ID': user_id, 'Course ID': course_id, 'Rating': rating})

In [13]:
# Create DataFrame with generated data
df = pd.DataFrame(data)
# Create ratings matrix with sparsity
ratings_matrix = df.pivot_table(values='Rating', index='User ID', columns='Course ID')
ratings_matrix

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,1.0,,,1.0,,,,,...,,,,3.0,,,,5.0,,
1,,,2.0,,,3.0,,,4.0,,...,,,1.0,,,,,,,
2,,,,,,,,,,,...,,,5.0,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,2.0,,,2.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,,,,,,,,,,,...,,,,,,,,,,
996,,,,,,,,,,,...,,,,,,,,,,
997,,,,,,,,,,,...,,,,,,,,,,
998,,,,5.0,,,,,,,...,,,,,,,,,,


In [14]:
item_similarity = ratings_matrix.corr()
item_similarity.head(5)

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
Course ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.447214,0.174078,,0.866025,,-0.555556,,-1.0,,...,,0.243432,,,-1.0,,0.188982,-0.32969,,
1,0.447214,1.0,,1.0,-0.301511,,,0.5,-0.5,,...,,,-1.0,0.693375,,1.0,,,-0.654654,-1.0
2,0.174078,,1.0,1.0,0.240523,0.177705,0.130189,0.866025,1.0,,...,-1.0,-1.0,0.485662,0.866025,-1.0,-1.0,,-0.414039,,0.0
3,,1.0,1.0,1.0,,-1.0,-0.283144,0.50565,-1.0,0.944911,...,,,-0.5,-0.316228,1.0,1.0,-0.547723,,,
4,0.866025,-0.301511,0.240523,,1.0,-0.560612,,,-0.838525,0.354167,...,,-1.0,,,,0.301511,-0.920737,0.107443,,-0.176777


In [15]:
# Gets a list of rated classes for a user_id
def get_rated_classes(user_id, ratings_matrix):
    return list(ratings_matrix.loc[user_id].dropna().index)
    
# Gets the category of a class by class_id
def get_category(class_id, classes):
    return classes[classes['id'] == class_id].category.iloc[0]

# Gets the title of a class by class_id
def get_title(class_id, classes):
    return classes[classes['id'] == class_id].title.iloc[0]

# Gets the rating a user_id has given to a class_id
def get_rating(user_id, class_id, ratings_matrix):
    return ratings_matrix[class_id][user_id]

# Print rated classes
def print_rated_classes(user_id, rating_matrix, classes):
    for class_id in get_rated_classes(user_id, rating_matrix):
        print("%d %.1f %s " %
          (class_id, get_rating(user_id, class_id, rating_matrix), get_title(class_id, classes)))

In [16]:
def get_classes_relevance(user_id, ratings_matrix, item_similarity_matrix):

    # Create an empty series
    classes_relevance = pd.Series()

    # Iterate through the classes the user has rated
    for class_rated in get_rated_classes(user_id, ratings_matrix):

        # Obtain the rating given
        rating_given = get_rating(user_id, class_rated, ratings_matrix)

        # Obtain the vector containing the similarities of class_rated
        # with all other class in item_similarity_matrix
        similarities = item_similarity_matrix[class_rated]

        # Multiply this vector by the given rating
        weighted_similarities = similarities * rating_given

        # Append these terms to classes_relevance
        classes_relevance = classes_relevance.append(weighted_similarities)

    # Compute the sum for each class
    classes_relevance = classes_relevance.groupby(classes_relevance.index).sum()

    # Convert to a dataframe
    classes_relevance_df = pd.DataFrame(classes_relevance, columns=['relevance'])
    classes_relevance_df['class_id'] = classes_relevance_df.index
    

    return classes_relevance_df

In [17]:
user_id =  222
classes_relevance = get_classes_relevance(user_id,ratings_matrix,item_similarity)
classes_relevance = classes_relevance.sort_values("relevance", ascending=False).head(10)
classes_relevance

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id
279,55.411658,279
462,45.027276,462
963,39.6925,963
514,37.498763,514
828,36.424575,828
998,36.31731,998
216,35.993361,216
58,35.732383,58
163,35.641525,163
141,34.473036,141


Now we need to remove already rated classes

In [20]:
def get_recommended_classes(user_id, ratings_matrix, item_similarity_matrix, classes_df):
    classes_relevance = get_classes_relevance(user_id, ratings_matrix, item_similarity_matrix)
    classes_relevance = classes_relevance.set_index(classes_relevance["class_id"].to_numpy())
    classes_relevance = classes_relevance.sort_values("relevance", ascending=False)
    rated_classes = get_rated_classes(user_id, ratings_matrix)
    recommended_classes = classes_relevance.drop(rated_classes)

    recommended_classes = pd.merge(recommended_classes, classes_df, left_on='class_id', right_on='id', how='left')
    recommended_classes =  recommended_classes.drop(['datetime'], axis = 1)
    return recommended_classes

In [21]:
get_recommended_classes(user_id, ratings_matrix, item_similarity, classes_df)

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id,id,category,creator,level,maxUsers,price,title
0,39.692500,963,963,HIIT,856,all levels,1,15.33,A HIIT class
1,35.993361,216,216,Spinning,569,advanced,6,20.35,A Spinning class
2,35.732383,58,58,Pilates,791,begginer,36,18.53,A Pilates class
3,34.473036,141,141,Personalized,847,advanced,10,15.11,A Personalized class
4,34.435185,165,165,ABS,350,advanced,30,16.61,A ABS class
...,...,...,...,...,...,...,...,...,...
945,-35.268154,556,556,GAP,336,intermediate,3,11.86,A GAP class
946,-37.980876,308,308,Body Combat,791,intermediate,41,12.73,A Body Combat class
947,-40.856690,823,823,Pilates,919,intermediate,38,17.40,A Pilates class
948,-41.755303,276,276,Kickboxing,92,all levels,28,19.11,A Kickboxing class
