In [1]:
import pandas as pd
import numpy as np
#from keras.layers import Input, Embedding, Flatten, Dot
#from keras.models import Model
#import redis
import fakeredis

In [2]:
# create a fake Redis database
fake_redis = fakeredis.FakeStrictRedis()

In [3]:
from datetime import date, timedelta, datetime

def calculate_age(birth_date):
    today = date.today()
    age = today.year - birth_date.year

    # Check if birthday has not occurred yet this year
    if today.month < birth_date.month or (today.month == birth_date.month and today.day < birth_date.day):
        age -= 1

    return age

In [4]:
import random
def generate_false_true(prob_false=0.8):
    if random.random() < prob_false:
        return False
    else:
        return True

In [5]:
from faker import Faker
from gender_guesser.detector import Detector

# Create an instance of the Faker class
fake = Faker()
# Create an instance of the Detector class
detector = Detector()

# Define the fields for your fake data
fields = ['name', 'birthdate', 'gender', 'location']

# Generate fake data for a specific number of records
num_users = 1000
fake_data = []
for id in range(num_users):

    record = {'id': id, 'name': fake.name(), 'birthdate': fake.date_of_birth(minimum_age=14, maximum_age=90) , 'city': fake.city(), 'trainer': generate_false_true() }
    record['gender'] = detector.get_gender(record['name'].split(' ')[0])
    record['age'] = calculate_age(record['birthdate'])
    # If name is not clear if it is feminine or masculin, then set it to 'Prefer not to say'.
    if record['gender'] != 'male' and record['gender'] != 'female':
        record['gender'] = 'Prefer not to say'
    fake_data.append(record)

# Print the generated fake data
"""
for record in fake_data:
    print(record)
"""

'\nfor record in fake_data:\n    print(record)\n'

In [6]:
users = pd.DataFrame(fake_data)
users.head()

Unnamed: 0,id,name,birthdate,city,trainer,gender,age
0,0,Travis Mccoy,1962-04-15,West Robinville,False,male,61
1,1,Frank Young,1983-05-01,Port Judithport,False,male,40
2,2,William Wagner,1963-11-10,Morrishaven,False,male,59
3,3,Brandon Lee,1959-01-30,New Richardmouth,False,male,64
4,4,Lynn Patterson,1960-03-07,New Cheryl,False,Prefer not to say,63


In [7]:
trainers = users[users['trainer'] == True]
trainers_ids = trainers['id'].values

In [8]:
# Create a custom provider for fitness disciplines
class FitnessProvider:
    def __init__(self, faker):
        self.faker = faker

    def fitness_discipline(self):
        disciplines = [
            'Yoga',
            'Pilates',
            'CrossFit',
            'Zumba',
            'Kickboxing',
            'Spinning',
            'Barre',
            'HIIT',
            'Aerobics',
            'Boxing',
            'Personalized',
            'Body Combat',
            'Body Pump',
            'GAP',
            'Total Body Conditioning',
            'ABS',
            'Stretching',
            'Other'
        ]
        return self.faker.random_element(disciplines)
    def level(self):
        levels = ['begginer', 'intermediate', 'advanced', 'all levels']
        return self.faker.random_element(levels)

# Create an instance of the Faker class
fake = Faker()

# Add the custom provider to the Faker instance
fake.add_provider(FitnessProvider)

In [9]:
def approximate_datetime(dt):
    # Calculate the number of minutes past the hour
    minutes_past_hour = dt.minute + dt.second / 60

    # Determine the rounding factor based on the number of minutes past the hour
    rounding_factor = 30 if minutes_past_hour >= 30 else 0

    # Calculate the rounded datetime
    rounded_dt = dt.replace(minute=0, second=0) + timedelta(minutes=rounding_factor)

    return rounded_dt

In [10]:
possible_durations_h = [0,1,2]
possible_durations_min = [i for i in range(0,60,5)]


In [11]:
import numpy as np
# Generate fake data for a specific number of records
num_classes = 1000
classes = []
mean = 15
var = 3
today = datetime.now()
approx_today =  approximate_datetime(today)
for id in range(num_classes):
    price = np.random.normal(mean, var, 1)[0]
    price = round(price,2)
    random_days = random.randint(1, 365)
    # Generate a random number of 30-minute intervals
    random_intervals = random.randint(0, 48)  # 48 intervals in a day (24 hours * 2 intervals per hour) 
    record = {'id': id, 'category': fake.fitness_discipline(), 'creator': random.choice(trainers_ids) ,
               'level':fake.level(), 'maxUsers': random.randint(1, 50), 'price': price, 'datetime': approx_today+ timedelta(days=random_days, minutes=30 * random_intervals)}
    record['title'] = 'A ' + record['category'] + ' class'
    duration_h = random.choice(possible_durations_h)
    duration = str(duration_h) + ' h '
    if(duration_h != 2):
        duration_min = random.choice(possible_durations_min)
        if(duration_min != 0):
            duration +=  str(duration_min) + ' min'
    record['duration'] = duration


    classes.append(record)

In [12]:
classes_df = pd.DataFrame(classes)
classes_df.head()

Unnamed: 0,id,category,creator,level,maxUsers,price,datetime,title,duration
0,0,ABS,965,begginer,16,11.64,2024-04-23 23:00:00.455218,A ABS class,0 h 40 min
1,1,Kickboxing,89,all levels,10,9.6,2023-11-10 00:30:00.455218,A Kickboxing class,0 h 45 min
2,2,Total Body Conditioning,739,intermediate,42,11.58,2024-01-22 05:30:00.455218,A Total Body Conditioning class,2 h
3,3,Barre,180,all levels,36,17.02,2024-01-16 21:00:00.455218,A Barre class,0 h 5 min
4,4,Body Combat,933,all levels,49,18.94,2024-02-23 11:30:00.455218,A Body Combat class,1 h 45 min


In [13]:
data = []
for user_id in range(num_users):  
    for _ in range(num_classes):  
        course_id = fake.random_int(min=0, max=num_classes-1)  # Generate a random course ID
        is_nan = random.random()
        # Create sparsity
        if is_nan <= 0.95:
            rating = np.nan
        else:
            rating = random.randint(1,5) # Generate a random rating

        data.append({'User ID': user_id, 'Course ID': course_id, 'Rating': rating})

In [14]:
# Create DataFrame with generated data
df = pd.DataFrame(data)
# Create ratings matrix with sparsity
ratings_matrix = df.pivot_table(values='Rating', index='User ID', columns='Course ID')
ratings_matrix

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,5.0,,...,,,,,,,,1.0,,
1,,,,,,,,,,,...,,,,,,,3.0,,,
2,,,,,,,,,,,...,,,,,,,,5.0,,
3,,2.0,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,3.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,,,,,,,,,,4.0,...,,,,,,,,,,
996,,,,,,,,,,,...,,,,,,,,,,
997,,,,,,,,,,,...,,,,,,,,,,
998,,,,,,,,,,,...,,,4.0,,4.0,,,,,


In [15]:
item_similarity = ratings_matrix.corr()
item_similarity.head(5)

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
Course ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.866025,0.174078,0.774597,,,,,,,...,,,1.0,-0.5,0.960769,,-0.544331,,,
1,0.866025,1.0,,,,-1.0,-0.27735,-1.0,-0.555501,-0.27735,...,,,0.492366,,,,-1.0,,0.970725,
2,0.174078,,1.0,,,,1.0,-0.174078,-0.090909,0.469697,...,-0.755929,,0.333333,0.473684,-0.258199,1.0,,0.419314,-0.188982,0.204837
3,0.774597,,,1.0,1.0,,,-1.0,,-0.981981,...,,,,-1.0,-1.0,1.0,,-0.5,1.0,0.755929
4,,,,1.0,1.0,,0.5,0.188982,0.188982,0.246183,...,,-1.0,-0.944911,,,,0.891042,,,


In [16]:
# Gets a list of rated classes for a user_id
def get_rated_classes(user_id, ratings_matrix):
    return list(ratings_matrix.loc[user_id].dropna().index)
    
# Gets the category of a class by class_id
def get_category(class_id, classes):
    return classes[classes['id'] == class_id].category.iloc[0]

# Gets the title of a class by class_id
def get_title(class_id, classes):
    return classes[classes['id'] == class_id].title.iloc[0]

# Gets the rating a user_id has given to a class_id
def get_rating(user_id, class_id, ratings_matrix):
    return ratings_matrix[class_id][user_id]

# Print rated classes
def print_rated_classes(user_id, rating_matrix, classes):
    for class_id in get_rated_classes(user_id, rating_matrix):
        print("%d %.1f %s " %
          (class_id, get_rating(user_id, class_id, rating_matrix), get_title(class_id, classes)))

In [24]:
def get_item_similarity(ratings_matrix):
    return ratings_matrix.corr()

In [22]:
def get_classes_relevance(user_id, ratings_matrix):
    
    # computes correlation between all combinations of items
    item_similarity_matrix = get_item_similarity(ratings_matrix)
    # Create an empty series
    classes_relevance = pd.Series()

    # Iterate through the classes the user has rated
    for class_rated in get_rated_classes(user_id, ratings_matrix):

        # Obtain the rating given
        rating_given = get_rating(user_id, class_rated, ratings_matrix)

        # Obtain the vector containing the similarities of class_rated
        # with all other class in item_similarity_matrix
        similarities = item_similarity_matrix[class_rated]

        # Multiply this vector by the given rating
        weighted_similarities = similarities * rating_given

        # Append these terms to classes_relevance
        classes_relevance = classes_relevance.append(weighted_similarities)

    # Compute the sum for each class
    classes_relevance = classes_relevance.groupby(classes_relevance.index).sum()

    # Convert to a dataframe
    classes_relevance_df = pd.DataFrame(classes_relevance, columns=['relevance'])
    classes_relevance_df['class_id'] = classes_relevance_df.index
    

    return classes_relevance_df

In [18]:
user_id =  222
classes_relevance = get_classes_relevance(user_id,ratings_matrix,item_similarity)
classes_relevance = classes_relevance.sort_values("relevance", ascending=False).head(10)
classes_relevance

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id
557,48.587267,557
590,47.361568,590
616,44.717647,616
687,43.918167,687
674,43.845632,674
80,43.488018,80
387,42.421487,387
327,41.992201,327
398,41.467595,398
512,40.449866,512


Now we need to remove already rated classes

In [19]:
def get_recommended_classes(user_id, ratings_matrix, classes_df):
    classes_relevance = get_classes_relevance(user_id, ratings_matrix)
    classes_relevance = classes_relevance.set_index(classes_relevance["class_id"].to_numpy())
    classes_relevance = classes_relevance.sort_values("relevance", ascending=False)
    rated_classes = get_rated_classes(user_id, ratings_matrix)
    recommended_classes = classes_relevance.drop(rated_classes)


    recommended_classes['class_id'] = recommended_classes['class_id'].astype(int)
    classes_df['id'] = classes_df['id'].astype(int)

    recommended_classes = pd.merge(recommended_classes, classes_df, left_on='class_id', right_on='id', how='left')

    recommended_classes =  recommended_classes.drop(['id'], axis = 1)
    return recommended_classes

In [25]:
get_recommended_classes(user_id, ratings_matrix, classes_df)

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id,category,creator,level,maxUsers,price,datetime,title,duration
0,43.918167,687,Body Combat,684,advanced,29,14.81,2023-08-30 10:00:00.455218,A Body Combat class,1 h 5 min
1,43.845632,674,Total Body Conditioning,296,all levels,41,13.17,2024-05-22 04:00:00.455218,A Total Body Conditioning class,2 h
2,42.421487,387,GAP,332,begginer,19,17.38,2023-12-29 07:00:00.455218,A GAP class,0 h 30 min
3,41.467595,398,Barre,715,begginer,33,15.35,2023-10-04 05:00:00.455218,A Barre class,0 h 10 min
4,40.449866,512,Aerobics,355,begginer,39,11.02,2023-10-02 03:30:00.455218,A Aerobics class,2 h
...,...,...,...,...,...,...,...,...,...,...
944,-31.699722,666,Total Body Conditioning,89,begginer,32,9.61,2023-07-20 12:00:00.455218,A Total Body Conditioning class,0 h 55 min
945,-31.708224,967,Pilates,625,advanced,47,17.82,2024-03-19 20:30:00.455218,A Pilates class,2 h
946,-31.752181,476,Total Body Conditioning,23,all levels,33,19.38,2023-08-07 02:30:00.455218,A Total Body Conditioning class,0 h 10 min
947,-35.630784,611,Body Pump,587,all levels,24,17.20,2023-09-23 20:30:00.455218,A Body Pump class,0 h 35 min


In [26]:
def get_classes_from_trainer(trainer_id, classes_df):
    return classes_df[classes_df['creator'] == trainer_id]

def get_earliest_class(classes_df):
    #earliest_entry_index = classes_df['datetime'].astype(int).idxmin()
    earliest_entry_index = pd.to_datetime(classes_df['datetime']).idxmin()
    return classes_df.loc[earliest_entry_index]

def get_earliest_classes_from_trainer_and_category(trainer_id, classes_df, category):
    trainer_classes = get_classes_from_trainer(trainer_id, classes_df)
    category_matching_classes = trainer_classes[trainer_classes['category'] == category]
    return get_earliest_class(category_matching_classes)

    

def get_similar_classes(classes, recommendations):
    for index, entry in recommendations.iterrows():
        print("INDEX ", entry['class_id'])
        trainer_id = entry['creator']
        category = entry['category']
        recommended_class = get_earliest_classes_from_trainer_and_category(trainer_id, classes, category)
        print(recommended_class)
        
        if index > 10:
            return None


In [27]:
def from_csv_to_df(filename):
    #path = 'recommendation system\\fake_data\\' + filename
    path = 'fake_data\\' + filename
    return pd.read_csv(path + '.csv')

In [28]:
users = from_csv_to_df('users')
classes = from_csv_to_df('classes')
ratings_matrix = from_csv_to_df('ratings_matrix')
user_id = 100
recommendations = get_recommended_classes(user_id, ratings_matrix, classes)
print(" RECOMMENDATIONS ")
print(recommendations.head(10)['class_id'])
print(recommendations[recommendations['class_id']== 911])
print("--------------------------------------")
get_similar_classes(classes, recommendations)

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

 RECOMMENDATIONS 
0    662
1    120
2    392
3    789
4    614
5    173
6    999
7    595
8    712
9    193
Name: class_id, dtype: int32
    relevance  class_id category  creator     level  maxUsers  price  \
11  23.894569       911    Zumba      821  begginer        39  12.61   

                      datetime          title    duration  
11  2023-08-14 06:30:00.242262  A Zumba class  0 h 15 min  
--------------------------------------
INDEX  662
id                                 662
category                   Body Combat
creator                            854
level                         advanced
maxUsers                            32
price                            13.78
datetime    2023-07-18 14:00:00.242262
title              A Body Combat class
duration                    0 h 55 min
Name: 662, dtype: object
INDEX  120
id                                 120
category                          Yoga
creator                            384
level                       all levels
maxUs