In [2]:
import pandas as pd
import numpy as np
#from keras.layers import Input, Embedding, Flatten, Dot
#from keras.models import Model
#import redis
import fakeredis

In [3]:
# create a fake Redis database
fake_redis = fakeredis.FakeStrictRedis()

In [4]:
from datetime import date, timedelta, datetime

def calculate_age(birth_date):
    today = date.today()
    age = today.year - birth_date.year

    # Check if birthday has not occurred yet this year
    if today.month < birth_date.month or (today.month == birth_date.month and today.day < birth_date.day):
        age -= 1

    return age

In [5]:
import random
def generate_false_true(prob_false=0.8):
    if random.random() < prob_false:
        return False
    else:
        return True

In [6]:
from faker import Faker
from gender_guesser.detector import Detector

# Create an instance of the Faker class
fake = Faker()
# Create an instance of the Detector class
detector = Detector()

# Define the fields for your fake data
fields = ['name', 'birthdate', 'gender', 'location']

# Generate fake data for a specific number of records
num_users = 1000
fake_data = []
for id in range(num_users):

    record = {'id': id, 'name': fake.name(), 'birthdate': fake.date_of_birth(minimum_age=14, maximum_age=90) , 'city': fake.city(), 'trainer': generate_false_true() }
    record['gender'] = detector.get_gender(record['name'].split(' ')[0])
    record['age'] = calculate_age(record['birthdate'])
    # If name is not clear if it is feminine or masculin, then set it to 'Prefer not to say'.
    if record['gender'] != 'male' and record['gender'] != 'female':
        record['gender'] = 'Prefer not to say'
    fake_data.append(record)

# Print the generated fake data
"""
for record in fake_data:
    print(record)
"""

'\nfor record in fake_data:\n    print(record)\n'

In [7]:
users = pd.DataFrame(fake_data)
users.head()

Unnamed: 0,id,name,birthdate,city,trainer,gender,age
0,0,Diana Carter,1932-07-02,South Isabel,True,female,90
1,1,Paige Washington,1991-11-22,Port Matthew,False,Prefer not to say,31
2,2,Melissa Fox,1934-12-25,Garciaville,False,female,88
3,3,Amy Rogers,1932-08-13,New Johnhaven,False,female,90
4,4,Matthew Franklin,1981-08-29,Berryborough,False,male,41


In [8]:
trainers = users[users['trainer'] == True]
trainers_ids = trainers['id'].values

In [9]:
# Create a custom provider for fitness disciplines
class FitnessProvider:
    def __init__(self, faker):
        self.faker = faker

    def fitness_discipline(self):
        disciplines = [
            'Yoga',
            'Pilates',
            'CrossFit',
            'Zumba',
            'Kickboxing',
            'Spinning',
            'Barre',
            'HIIT',
            'Aerobics',
            'Boxing',
            'Personalized',
            'Body Combat',
            'Body Pump',
            'GAP',
            'Total Body Conditioning',
            'ABS',
            'Stretching',
            'Other'
        ]
        return self.faker.random_element(disciplines)
    def level(self):
        levels = ['begginer', 'intermediate', 'advanced', 'all levels']
        return self.faker.random_element(levels)

# Create an instance of the Faker class
fake = Faker()

# Add the custom provider to the Faker instance
fake.add_provider(FitnessProvider)

In [10]:
def approximate_datetime(dt):
    # Calculate the number of minutes past the hour
    minutes_past_hour = dt.minute + dt.second / 60

    # Determine the rounding factor based on the number of minutes past the hour
    rounding_factor = 30 if minutes_past_hour >= 30 else 0

    # Calculate the rounded datetime
    rounded_dt = dt.replace(minute=0, second=0) + timedelta(minutes=rounding_factor)

    return rounded_dt

In [11]:
possible_durations_h = [0,1,2]
possible_durations_min = [i for i in range(0,60,5)]


In [12]:
import numpy as np
# Generate fake data for a specific number of records
num_classes = 1000
classes = []
mean = 15
var = 3
today = datetime.now()
approx_today =  approximate_datetime(today)
for id in range(num_classes):
    price = np.random.normal(mean, var, 1)[0]
    price = round(price,2)
    random_days = random.randint(1, 365)
    # Generate a random number of 30-minute intervals
    random_intervals = random.randint(0, 48)  # 48 intervals in a day (24 hours * 2 intervals per hour) 
    record = {'id': id, 'category': fake.fitness_discipline(), 'creator': random.choice(trainers_ids) ,
               'level':fake.level(), 'maxUsers': random.randint(1, 50), 'price': price, 'datetime': approx_today+ timedelta(days=random_days, minutes=30 * random_intervals)}
    record['title'] = 'A ' + record['category'] + ' class'
    duration_h = random.choice(possible_durations_h)
    duration = str(duration_h) + ' h '
    if(duration_h != 2):
        duration_min = random.choice(possible_durations_min)
        if(duration_min != 0):
            duration +=  str(duration_min) + ' min'
    record['duration'] = duration


    classes.append(record)

In [13]:
classes_df = pd.DataFrame(classes)
classes_df.head()

Unnamed: 0,id,category,creator,level,maxUsers,price,datetime,title,duration
0,0,Personalized,165,begginer,10,17.57,2023-11-27 02:30:00.675411,A Personalized class,1 h 10 min
1,1,Body Pump,644,all levels,41,16.9,2023-07-24 00:00:00.675411,A Body Pump class,0 h 40 min
2,2,Body Pump,834,begginer,23,12.21,2023-09-17 18:00:00.675411,A Body Pump class,2 h
3,3,Pilates,249,all levels,33,12.8,2023-11-27 20:30:00.675411,A Pilates class,2 h
4,4,Personalized,491,begginer,46,21.43,2024-01-19 09:30:00.675411,A Personalized class,0 h 55 min


In [14]:
data = []
for user_id in range(num_users):  
    for _ in range(num_classes):  
        course_id = fake.random_int(min=0, max=num_classes-1)  # Generate a random course ID
        is_nan = random.random()
        # Create sparsity
        if is_nan <= 0.95:
            rating = np.nan
        else:
            rating = random.randint(1,5) # Generate a random rating

        data.append({'User ID': user_id, 'Course ID': course_id, 'Rating': rating})

In [15]:
# Create DataFrame with generated data
df = pd.DataFrame(data)
# Create ratings matrix with sparsity
ratings_matrix = df.pivot_table(values='Rating', index='User ID', columns='Course ID')
ratings_matrix

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,,,,,,,4.0,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,3.0,...,,,,4.0,,,,,,
3,,,,,4.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,2.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,,,,,,,,,,,...,,,,,,,,,,
996,,,,,,,,,,,...,,,,,,,,,,
997,,,,,,,,,,,...,,,,,,,,,,
998,,,,,,,,,,,...,,,,,,,,,,


In [16]:
item_similarity = ratings_matrix.corr()
item_similarity.head(5)

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
Course ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,-0.866025,,0.174078,0.4,1.0,,-0.333333,-1.0,,...,,,-0.57735,0.327327,-0.632456,,,,0.12666,
1,-0.866025,1.0,0.5,-1.0,-1.0,,,-0.654654,,,...,0.068041,,0.981981,-0.534522,-0.555556,-0.960769,0.243332,,,-0.875755
2,,0.5,1.0,-0.755929,,0.316228,-0.5,1.0,,,...,,,,-1.0,-1.0,1.0,1.0,,-0.944911,1.0
3,0.174078,-1.0,-0.755929,1.0,,-0.333333,0.654654,,0.142857,0.174078,...,,0.944911,-1.0,-0.5,-0.081349,,0.404226,,-1.0,0.628971
4,0.4,-1.0,,,1.0,1.0,,,,-0.27735,...,0.485662,,-1.0,,0.5,-1.0,0.301511,,,-0.5


In [17]:
# Gets a list of rated classes for a user_id
def get_rated_classes(user_id, ratings_matrix):
    return list(ratings_matrix.loc[user_id].dropna().index)
    
# Gets the category of a class by class_id
def get_category(class_id, classes):
    return classes[classes['id'] == class_id].category.iloc[0]

# Gets the title of a class by class_id
def get_title(class_id, classes):
    return classes[classes['id'] == class_id].title.iloc[0]

# Gets the rating a user_id has given to a class_id
def get_rating(user_id, class_id, ratings_matrix):
    return ratings_matrix[class_id][user_id]

# Print rated classes
def print_rated_classes(user_id, rating_matrix, classes):
    for class_id in get_rated_classes(user_id, rating_matrix):
        print("%d %.1f %s " %
          (class_id, get_rating(user_id, class_id, rating_matrix), get_title(class_id, classes)))

In [18]:
def get_classes_relevance(user_id, ratings_matrix, item_similarity_matrix):

    # Create an empty series
    classes_relevance = pd.Series()

    # Iterate through the classes the user has rated
    for class_rated in get_rated_classes(user_id, ratings_matrix):

        # Obtain the rating given
        rating_given = get_rating(user_id, class_rated, ratings_matrix)

        # Obtain the vector containing the similarities of class_rated
        # with all other class in item_similarity_matrix
        similarities = item_similarity_matrix[class_rated]

        # Multiply this vector by the given rating
        weighted_similarities = similarities * rating_given

        # Append these terms to classes_relevance
        classes_relevance = classes_relevance.append(weighted_similarities)

    # Compute the sum for each class
    classes_relevance = classes_relevance.groupby(classes_relevance.index).sum()

    # Convert to a dataframe
    classes_relevance_df = pd.DataFrame(classes_relevance, columns=['relevance'])
    classes_relevance_df['class_id'] = classes_relevance_df.index
    

    return classes_relevance_df

In [19]:
user_id =  222
classes_relevance = get_classes_relevance(user_id,ratings_matrix,item_similarity)
classes_relevance = classes_relevance.sort_values("relevance", ascending=False).head(10)
classes_relevance

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id
304,36.213818,304
460,36.053851,460
657,34.054418,657
995,31.795062,995
237,31.168861,237
494,29.08866,494
333,28.301011,333
921,25.755316,921
318,25.581188,318
285,25.430177,285


Now we need to remove already rated classes

In [20]:
def get_recommended_classes(user_id, ratings_matrix, item_similarity_matrix, classes_df):
    classes_relevance = get_classes_relevance(user_id, ratings_matrix, item_similarity_matrix)
    classes_relevance = classes_relevance.set_index(classes_relevance["class_id"].to_numpy())
    classes_relevance = classes_relevance.sort_values("relevance", ascending=False)
    rated_classes = get_rated_classes(user_id, ratings_matrix)
    recommended_classes = classes_relevance.drop(rated_classes)

    recommended_classes = pd.merge(recommended_classes, classes_df, left_on='class_id', right_on='id', how='left')
    recommended_classes =  recommended_classes.drop(['datetime'], axis = 1)
    return recommended_classes

In [21]:
get_recommended_classes(user_id, ratings_matrix, item_similarity, classes_df)

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id,id,category,creator,level,maxUsers,price,title,duration
0,36.213818,304,304,Stretching,285,begginer,41,11.29,A Stretching class,2 h
1,34.054418,657,657,Other,805,all levels,33,12.35,A Other class,2 h
2,31.795062,995,995,Body Combat,219,all levels,7,13.61,A Body Combat class,0 h 45 min
3,31.168861,237,237,Body Combat,389,advanced,47,11.92,A Body Combat class,2 h
4,29.088660,494,494,Total Body Conditioning,640,advanced,14,15.47,A Total Body Conditioning class,2 h
...,...,...,...,...,...,...,...,...,...,...
954,-31.931325,838,838,Boxing,445,begginer,8,11.85,A Boxing class,2 h
955,-32.271522,247,247,Spinning,402,intermediate,29,14.54,A Spinning class,2 h
956,-33.845270,8,8,CrossFit,850,intermediate,19,16.94,A CrossFit class,2 h
957,-34.546427,884,884,Personalized,605,begginer,41,21.19,A Personalized class,0 h 5 min
