In [60]:
import pandas as pd
import numpy as np
from keras.layers import Input, Embedding, Flatten, Dot
from keras.models import Model
import redis
import fakeredis

In [61]:
# create a fake Redis database
fake_redis = fakeredis.FakeStrictRedis()

In [62]:
from datetime import date, timedelta, datetime

def calculate_age(birth_date):
    today = date.today()
    age = today.year - birth_date.year

    # Check if birthday has not occurred yet this year
    if today.month < birth_date.month or (today.month == birth_date.month and today.day < birth_date.day):
        age -= 1

    return age

In [63]:
import random
def generate_false_true(prob_false=0.8):
    if random.random() < prob_false:
        return False
    else:
        return True

In [64]:
from faker import Faker
from gender_guesser.detector import Detector

# Create an instance of the Faker class
fake = Faker()
# Create an instance of the Detector class
detector = Detector()

# Define the fields for your fake data
fields = ['name', 'birthdate', 'gender', 'location']

# Generate fake data for a specific number of records
num_users = 10
fake_data = []
for id in range(num_users):

    record = {'id': id, 'name': fake.name(), 'birthdate': fake.date_of_birth(minimum_age=14, maximum_age=90) , 'city': fake.city(), 'trainer': generate_false_true() }
    record['gender'] = detector.get_gender(record['name'].split(' ')[0])
    record['age'] = calculate_age(record['birthdate'])
    # If name is not clear if it is feminine or masculin, then set it to 'Prefer not to say'.
    if record['gender'] != 'male' and record['gender'] != 'female':
        record['gender'] = 'Prefer not to say'
    fake_data.append(record)

# Print the generated fake data
for record in fake_data:
    print(record)

{'id': 0, 'name': 'Christopher Mathews', 'birthdate': datetime.date(2001, 1, 10), 'city': 'Josephberg', 'trainer': False, 'gender': 'male', 'age': 22}
{'id': 1, 'name': 'Tiffany Kline', 'birthdate': datetime.date(1985, 9, 27), 'city': 'Ritterburgh', 'trainer': False, 'gender': 'female', 'age': 37}
{'id': 2, 'name': 'John Henry', 'birthdate': datetime.date(1940, 7, 15), 'city': 'South Johnburgh', 'trainer': False, 'gender': 'male', 'age': 82}
{'id': 3, 'name': 'Harold Gomez', 'birthdate': datetime.date(1952, 3, 20), 'city': 'West Toni', 'trainer': False, 'gender': 'male', 'age': 71}
{'id': 4, 'name': 'Jennifer Copeland', 'birthdate': datetime.date(1971, 1, 21), 'city': 'Lake Marymouth', 'trainer': False, 'gender': 'female', 'age': 52}
{'id': 5, 'name': 'Jessica Davis', 'birthdate': datetime.date(1985, 2, 4), 'city': 'Hannahburgh', 'trainer': False, 'gender': 'female', 'age': 38}
{'id': 6, 'name': 'Jessica Collins', 'birthdate': datetime.date(1983, 3, 30), 'city': 'Port Davidtown', 'trai

In [65]:
users = pd.DataFrame(fake_data)
users

Unnamed: 0,id,name,birthdate,city,trainer,gender,age
0,0,Christopher Mathews,2001-01-10,Josephberg,False,male,22
1,1,Tiffany Kline,1985-09-27,Ritterburgh,False,female,37
2,2,John Henry,1940-07-15,South Johnburgh,False,male,82
3,3,Harold Gomez,1952-03-20,West Toni,False,male,71
4,4,Jennifer Copeland,1971-01-21,Lake Marymouth,False,female,52
5,5,Jessica Davis,1985-02-04,Hannahburgh,False,female,38
6,6,Jessica Collins,1983-03-30,Port Davidtown,False,female,40
7,7,Amy Hanson,1988-04-28,Port Ronaldbury,False,female,35
8,8,Allison Robinson,1988-04-03,Garrisonstad,True,female,35
9,9,Timothy Leon,1976-04-28,East Matthew,False,male,47


In [66]:
trainers = users[users['trainer'] == True]
trainers_ids = trainers['id'].values

In [67]:
# Create a custom provider for fitness disciplines
class FitnessProvider:
    def __init__(self, faker):
        self.faker = faker

    def fitness_discipline(self):
        disciplines = [
            'Yoga',
            'Pilates',
            'CrossFit',
            'Zumba',
            'Kickboxing',
            'Spinning',
            'Barre',
            'HIIT',
            'Aerobics',
            'Boxing',
            'Personalized',
            'Body Combat',
            'Body Pump',
            'GAP',
            'Total Body Conditioning',
            'ABS',
            'Stretching'
        ]
        return self.faker.random_element(disciplines)
    def level(self):
        levels = ['begginer', 'intermediate', 'advanced', 'all levels']
        return self.faker.random_element(levels)

# Create an instance of the Faker class
fake = Faker()

# Add the custom provider to the Faker instance
fake.add_provider(FitnessProvider)

In [68]:
def approximate_datetime(dt):
    # Calculate the number of minutes past the hour
    minutes_past_hour = dt.minute + dt.second / 60

    # Determine the rounding factor based on the number of minutes past the hour
    rounding_factor = 30 if minutes_past_hour >= 30 else 0

    # Calculate the rounded datetime
    rounded_dt = dt.replace(minute=0, second=0) + timedelta(minutes=rounding_factor)

    return rounded_dt

In [69]:
import numpy as np
# Generate fake data for a specific number of records
num_classes = 10
courses = []
mean = 15
var = 3
today = datetime.now()
approx_today =  approximate_datetime(today)
for id in range(num_classes):
    price = np.random.normal(mean, var, 1)[0]
    price = round(price,2)
    random_days = random.randint(1, 365)
    # Generate a random number of 30-minute intervals
    random_intervals = random.randint(0, 48)  # 48 intervals in a day (24 hours * 2 intervals per hour) 
    record = {'id': id, 'title': fake.fitness_discipline(), 'creator': random.choice(trainers_ids) ,
               'level':fake.level(), 'maxUsers': random.randint(1, 50), 'price': price, 'datetime': approx_today+ timedelta(days=random_days, minutes=30 * random_intervals) }
    print(record)
    print(record['datetime'].time())
    courses.append(record)

{'id': 0, 'title': 'CrossFit', 'creator': 8, 'level': 'all levels', 'maxUsers': 22, 'price': 16.72, 'datetime': datetime.datetime(2023, 5, 30, 22, 0, 0, 261276)}
22:00:00.261276
{'id': 1, 'title': 'Kickboxing', 'creator': 8, 'level': 'intermediate', 'maxUsers': 12, 'price': 19.14, 'datetime': datetime.datetime(2023, 12, 30, 12, 0, 0, 261276)}
12:00:00.261276
{'id': 2, 'title': 'CrossFit', 'creator': 8, 'level': 'all levels', 'maxUsers': 10, 'price': 10.43, 'datetime': datetime.datetime(2023, 9, 29, 18, 30, 0, 261276)}
18:30:00.261276
{'id': 3, 'title': 'Zumba', 'creator': 8, 'level': 'all levels', 'maxUsers': 2, 'price': 16.39, 'datetime': datetime.datetime(2023, 6, 21, 11, 0, 0, 261276)}
11:00:00.261276
{'id': 4, 'title': 'Spinning', 'creator': 8, 'level': 'begginer', 'maxUsers': 12, 'price': 11.02, 'datetime': datetime.datetime(2023, 8, 24, 13, 0, 0, 261276)}
13:00:00.261276
{'id': 5, 'title': 'Zumba', 'creator': 8, 'level': 'begginer', 'maxUsers': 46, 'price': 20.95, 'datetime': dat

In [74]:
data = []
for user_id in range(num_users):  
    for _ in range(num_classes):  
        course_id = fake.random_int(min=0, max=num_classes-1)  # Generate a random course ID
        is_nan = random.random()
        # Create sparsity
        if is_nan <= 0.95:
            rating = np.nan
        else:
            rating = random.randint(1,5) # Generate a random rating

        data.append({'User ID': user_id, 'Course ID': course_id, 'Rating': rating})

In [75]:
# Create DataFrame with generated data
df = pd.DataFrame(data)
# Create ratings matrix with sparsity
ratings_matrix = df.pivot_table(values='Rating', index='User ID', columns='Course ID')
ratings_matrix

Course ID,0,1,2,3,4,5,6,7,8,9
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,,4.0,,,,,,5.0,1.0,
1,,,1.0,5.0,,,,,,
2,3.0,2.0,,,,1.0,,3.0,4.0,
3,4.0,1.0,,,2.0,,,4.0,3.0,1.0
4,,,2.0,,3.0,,,2.0,,
5,,,,,,,,,,1.0
6,,4.0,,,1.0,,,1.0,,
7,,,,,4.0,,3.0,,,
8,1.0,,,1.0,,,,2.0,,
