In [4]:
import pandas as pd
import numpy as np
from keras.layers import Input, Embedding, Flatten, Dot
from keras.models import Model
import redis
import fakeredis

In [5]:
# create a fake Redis database
fake_redis = fakeredis.FakeStrictRedis()

In [6]:
from datetime import date, timedelta, datetime

def calculate_age(birth_date):
    today = date.today()
    age = today.year - birth_date.year

    # Check if birthday has not occurred yet this year
    if today.month < birth_date.month or (today.month == birth_date.month and today.day < birth_date.day):
        age -= 1

    return age

In [7]:
import random
def generate_false_true(prob_false=0.8):
    if random.random() < prob_false:
        return False
    else:
        return True

In [8]:
from faker import Faker
from gender_guesser.detector import Detector

# Create an instance of the Faker class
fake = Faker()
# Create an instance of the Detector class
detector = Detector()

# Define the fields for your fake data
fields = ['name', 'birthdate', 'gender', 'location']

# Generate fake data for a specific number of records
num_users = 1000
fake_data = []
for id in range(num_users):

    record = {'id': id, 'name': fake.name(), 'birthdate': fake.date_of_birth(minimum_age=14, maximum_age=90) , 'city': fake.city(), 'trainer': generate_false_true() }
    record['gender'] = detector.get_gender(record['name'].split(' ')[0])
    record['age'] = calculate_age(record['birthdate'])
    # If name is not clear if it is feminine or masculin, then set it to 'Prefer not to say'.
    if record['gender'] != 'male' and record['gender'] != 'female':
        record['gender'] = 'Prefer not to say'
    fake_data.append(record)

# Print the generated fake data
"""
for record in fake_data:
    print(record)
"""

'\nfor record in fake_data:\n    print(record)\n'

In [9]:
users = pd.DataFrame(fake_data)
users.head()

Unnamed: 0,id,name,birthdate,city,trainer,gender,age
0,0,Scott Graham,1947-03-07,Jefferyview,False,male,76
1,1,Nicholas Velez,1992-11-07,Jameston,False,male,30
2,2,Jessica James,1947-01-18,Port Kaitlyn,False,female,76
3,3,Tamara Garrett,1999-07-19,North Bethany,False,female,23
4,4,Valerie Berry,2001-05-13,Port Rickey,True,female,22


In [10]:
trainers = users[users['trainer'] == True]
trainers_ids = trainers['id'].values

In [11]:
# Create a custom provider for fitness disciplines
class FitnessProvider:
    def __init__(self, faker):
        self.faker = faker

    def fitness_discipline(self):
        disciplines = [
            'Yoga',
            'Pilates',
            'CrossFit',
            'Zumba',
            'Kickboxing',
            'Spinning',
            'Barre',
            'HIIT',
            'Aerobics',
            'Boxing',
            'Personalized',
            'Body Combat',
            'Body Pump',
            'GAP',
            'Total Body Conditioning',
            'ABS',
            'Stretching'
        ]
        return self.faker.random_element(disciplines)
    def level(self):
        levels = ['begginer', 'intermediate', 'advanced', 'all levels']
        return self.faker.random_element(levels)

# Create an instance of the Faker class
fake = Faker()

# Add the custom provider to the Faker instance
fake.add_provider(FitnessProvider)

In [12]:
def approximate_datetime(dt):
    # Calculate the number of minutes past the hour
    minutes_past_hour = dt.minute + dt.second / 60

    # Determine the rounding factor based on the number of minutes past the hour
    rounding_factor = 30 if minutes_past_hour >= 30 else 0

    # Calculate the rounded datetime
    rounded_dt = dt.replace(minute=0, second=0) + timedelta(minutes=rounding_factor)

    return rounded_dt

In [13]:
import numpy as np
# Generate fake data for a specific number of records
num_classes = 1000
courses = []
mean = 15
var = 3
today = datetime.now()
approx_today =  approximate_datetime(today)
for id in range(num_classes):
    price = np.random.normal(mean, var, 1)[0]
    price = round(price,2)
    random_days = random.randint(1, 365)
    # Generate a random number of 30-minute intervals
    random_intervals = random.randint(0, 48)  # 48 intervals in a day (24 hours * 2 intervals per hour) 
    record = {'id': id, 'title': fake.fitness_discipline(), 'creator': random.choice(trainers_ids) ,
               'level':fake.level(), 'maxUsers': random.randint(1, 50), 'price': price, 'datetime': approx_today+ timedelta(days=random_days, minutes=30 * random_intervals) }

    courses.append(record)

In [14]:
data = []
for user_id in range(num_users):  
    for _ in range(num_classes):  
        course_id = fake.random_int(min=0, max=num_classes-1)  # Generate a random course ID
        is_nan = random.random()
        # Create sparsity
        if is_nan <= 0.95:
            rating = np.nan
        else:
            rating = random.randint(1,5) # Generate a random rating

        data.append({'User ID': user_id, 'Course ID': course_id, 'Rating': rating})

In [15]:
# Create DataFrame with generated data
df = pd.DataFrame(data)
# Create ratings matrix with sparsity
ratings_matrix = df.pivot_table(values='Rating', index='User ID', columns='Course ID')
ratings_matrix

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,,,,,,,1.0,,1.0,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,1.0,,,
3,5.0,,,,3.0,,,,,,...,,,,2.0,,,,,,
4,,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,,,,,,,,,,2.0,...,,,,,,,,,,
996,,,,,,,,,,1.0,...,,,,,2.0,,,,,
997,,,,,,,,,,,...,,,,,,,,,,
998,,,,,,,,3.0,,,...,,,,,,,,,3.0,


In [16]:
item_similarity = ratings_matrix.corr()
item_similarity.head(5)

Course ID,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
Course ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,,,,-0.944911,0.169031,,-0.57735,0.654654,1.0,...,,1.0,,,,0.430331,-1.0,1.0,,-0.6882472
1,,1.0,-0.5,0.642857,0.0,0.846802,-0.662266,1.0,-0.13484,,...,-0.09759,,,0.5,,0.866025,,1.0,0.944911,-9.930137e-17
2,,-0.5,1.0,0.0,0.0,-0.300123,-0.507093,1.0,0.5,0.67082,...,-1.0,-1.0,-0.174078,,0.632456,-0.522233,-0.866025,,,
3,,0.642857,0.0,1.0,,0.944911,,,,,...,-1.0,-1.0,,,0.5,-0.720577,0.866025,,-0.720577,
4,-0.944911,0.0,0.0,,1.0,0.720577,-0.842701,-0.29277,,,...,0.970725,,-0.693375,,,0.944911,0.866025,,,


In [17]:
# Gets a list of rated classes for a user_id
def get_rated_classes(user_id, ratings_matrix):
    return list(ratings_matrix.loc[user_id].dropna().index)
    
# Gets the title of a class by class_id
def get_title(class_id, classes):
    return classes[classes['id'] == class_id].title.iloc[0]

# Gets the rating a user_id has given to a class_id
def get_rating(user_id, class_id, ratings_matrix):
    return ratings_matrix[class_id][user_id]

# Print rated classes
def print_rated_classes(user_id, rating_matrix, classes):
    for class_id in get_rated_classes(user_id, rating_matrix):
        print("%d %.1f %s " %
          (class_id, get_rating(user_id, class_id, rating_matrix), get_title(class_id, classes)))

In [22]:
def get_classes_relevance(user_id, ratings_matrix, item_similarity_matrix):

    # Create an empty series
    classes_relevance = pd.Series()

    # Iterate through the classes the user has rated
    for class_rated in get_rated_classes(user_id, ratings_matrix):

        # Obtain the rating given
        rating_given = get_rating(user_id, class_rated, ratings_matrix)

        # Obtain the vector containing the similarities of class_rated
        # with all other class in item_similarity_matrix
        similarities = item_similarity_matrix[class_rated]

        # Multiply this vector by the given rating
        weighted_similarities = similarities * rating_given

        # Append these terms to classes_relevance
        classes_relevance = classes_relevance.append(weighted_similarities)

    # Compute the sum for each class
    classes_relevance = classes_relevance.groupby(classes_relevance.index).sum()

    # Convert to a dataframe
    classes_relevance_df = pd.DataFrame(classes_relevance, columns=['relevance'])
    classes_relevance_df['class_id'] = classes_relevance_df.index

    return classes_relevance_df

In [24]:
user_id =  222
classes_relevance = get_classes_relevance(user_id,ratings_matrix,item_similarity)
classes_relevance = classes_relevance.sort_values("relevance", ascending=False).head(10)
classes_relevance

  classes_relevance = pd.Series()
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_similarities)
  classes_relevance = classes_relevance.append(weighted_

Unnamed: 0,relevance,class_id
461,40.675166,461
740,37.426712,740
639,33.781087,639
295,33.612583,295
910,32.967418,910
92,32.710444,92
499,31.442015,499
209,31.375061,209
498,31.348262,498
293,31.094023,293
