# Syntetic Data Creation

## Imports

In [1]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_experimental.tabular_synthetic_data.openai import (
    OPENAI_TEMPLATE,
    create_openai_data_generator,
)
from langchain_experimental.tabular_synthetic_data.prompts import (
    SYNTHETIC_FEW_SHOT_PREFIX,
    SYNTHETIC_FEW_SHOT_SUFFIX,
)
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain_core.documents import Document
from langchain_pinecone import PineconeVectorStore
from pinecone import Index, Pinecone

import pickle
from pydantic import BaseModel
import numpy as np
import sqlite3
from dotenv import load_dotenv
import os

## Database

In [10]:
# Create the database
conn = sqlite3.connect('BeAlive.db')

In [11]:
c = conn.cursor()

# Connect to the database
# Create the users table
c.execute('''CREATE TABLE users
             (user_id INTEGER PRIMARY KEY,
              username TEXT UNIQUE NOT NULL CHECK(LENGTH(username) <= 10),
              password TEXT NOT NULL CHECK(LENGTH(password) <= 10),
              name TEXT,
              birthday DATE,
              email TEXT UNIQUE NOT NULL,
              phone_number TEXT,
              location TEXT,
              interests TEXT,
              cumulative_rating REAL DEFAULT 3)''')

# phone number as text to include + in country code
# TEXT type handles all characters

# Create the activities table
c.execute('''CREATE TABLE activities
             (activity_id INTEGER,
              host_id INTEGER NOT NULL,
              activity_name TEXT NOT NULL,
              activity_description TEXT NOT NULL,
              location TEXT NOT NULL,
              number_participants INTEGER Default 0,
              max_participants INTEGER,
              city TEXT NOT NULL,
              date_begin DATETIME NOT NULL,
              date_finish DATETIME NOT NULL,
              activity_state TEXT CHECK (activity_state IN ('open', 'full', 'finished')) Default 'open',
              cumulative_rating REAL DEFAULT 3,
              pinecone_id INTEGER,
              PRIMARY KEY (activity_id),
              FOREIGN KEY (host_id) REFERENCES users(user_id) ON DELETE CASCADE)''')

# Create the reservations table
c.execute('''CREATE TABLE reservations
             (activity_id INTEGER,
              host_id INTEGER NOT NULL,
              user_id INTEGER NOT NULL,
              message TEXT,
              state TEXT CHECK (state IN ('confirmed', 'pending')) Default 'pending',
              PRIMARY KEY (activity_id, host_id, user_id),
              FOREIGN KEY (activity_id) REFERENCES activities(activity_id) ON DELETE CASCADE,
              FOREIGN KEY (host_id) REFERENCES activities(host_id) ON DELETE CASCADE,
              FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE)''')

# Create the review_user table
c.execute('''CREATE TABLE review_user
             (host_id INTEGER,
              activity_id INTEGER,
              user_id INTEGER,
              review TEXT,
              rating INTEGER CHECK (rating >=1 AND rating <= 5) Default 4,
              PRIMARY KEY (host_id, activity_id, user_id),
              FOREIGN KEY (host_id) REFERENCES reservations(host_id),
              FOREIGN KEY (activity_id) REFERENCES reservations(activity_id) ON DELETE CASCADE,
              FOREIGN KEY (user_id) REFERENCES reservations(user_id) ON DELETE CASCADE)''')

# Create the review_activity table
c.execute('''CREATE TABLE review_activity
             (activity_id INTEGER,
              user_id INTEGER,
              review TEXT,
              rating INTEGER CHECK (rating >=1 AND rating <= 5) Default 4,
              PRIMARY KEY (activity_id, user_id),
              FOREIGN KEY (activity_id) REFERENCES reservations(activity_id) ON DELETE CASCADE,
              FOREIGN KEY (user_id) REFERENCES reservations(user_id) ON DELETE CASCADE)''')


conn.commit()
conn.close()

## Syntetic Data

### API

In [2]:
# Load the OpenAI API key from the .env file
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("The OPENAI_API_KEY environment variable is not set.")

#if "OPENAI_API_KEY" not in os.environ:
#    os.environ["OPENAI_API_KEY"] = api_key

### Pydantic Class for Tables

In [3]:
from pydantic import BaseModel, constr
from typing import Literal, Optional
from datetime import date, datetime

# User Model
class User(BaseModel):
    """
    Represents the input model for creating a new user.
    Attributes:
    ----------
        user_id : int
            The id of the new user.
        username : str
            The username of the user.
        password : str
            The password of the user.
        name : str
            The name of the user.
        birthday : date
            The birthday of the user.
        email : str
            The email of the user.
        phone_number : str
            The phone nmber of the user.
        location : str
            The location of the user.
        interests : str
            The interests the user has.
        cumulative_rating : float
            The user's cumulative rating.
    """
    user_id: int
    username: constr(max_length=10)
    password: constr(max_length=10)
    name: str
    birthday: date
    email: str
    phone_number: str
    location: str
    interests: str
    cumulative_rating: float = 0.0

# Activity Model
class Activity(BaseModel):
    """
    Represents the input model for creating a new activity.
    Attributes:
    ----------
        activity_id : int
            The activity id of the new activity.
        host_id : int
            The id of the activity host.
        activity_name : str
            The name of the activity.
        activity_description : str
            The description of the activity.
        location : str
            The location of the activity.
        number_participants : int
            The number of participants an activity has.
        max_participants : int
            The maximum number of participants an activity can have.
        city : str
            The city where the activity will happen.
        date_begin : datetime
            The date when the activity starts.
        date_finish : datetime
            The date when the activity ends.            
        activity_state : Literal['open', 'full', 'finished']
            The state of the activity (open, full or finished).
        cumulative_rating : float
            The user's cumulative rating.
        pinecone_id : int 
            An ID used for referencing in Pinecone.
    """
    activity_id: int
    host_id: int
    activity_name: str
    activity_description: str
    location: str
    number_participants: int
    max_participants: int
    city: str
    date_begin: datetime
    date_finish: datetime
    activity_state: Literal['open', 'full', 'finished']
    cumulative_rating: float = 0.0
    pinecone_id: Optional[int]

# Reservation Model
class Reservation(BaseModel):
    """
    Represents the input model for creating a reservation.
    Attributes:
    ----------
        activity_id : int
            The ID of the activity to make a reservation for.
        host_id : int
            The ID of the host of the activity.
        user_id : int
            The ID of the user making the reservation.
        message : str
            A message from the user to the host.
        state : Literal['confirmed', 'pending']
            The of the reservation (confirmed or pending).
    """
    activity_id: int
    host_id: int
    user_id: int
    message: str
    state: Literal['confirmed', 'pending']

# ReviewUser Model
class ReviewUser(BaseModel):
    """
    Represents the input model for reviewing a user.
    Attributes:
    ----------
        host_id : int
            The ID of the host making the review.
        activity_id : int
            The ID of the activity.
        user_id : int
            The ID of the user that will receive a review.
        review : str
            Feedback from the host.
        rating : int
            The rating the host gives to the user.
    """
    host_id: int
    activity_id: int
    user_id: int
    review: str
    rating: int

# ReviewActivity Model
class ReviewActivity(BaseModel):
    """
    Represents the input model for reviewing an activity.
    Attributes:
    ----------
        activity_id : int
            The ID of the activity.
        user_id : int
            The ID of the user that will review an activity.
        review : str
            Feedback from the user.
        rating : int
            The rating the user gives to the activity.
    """
    activity_id: int
    user_id: int
    review: str
    rating: int

### Syntectic data using Langchain and OpenAI

#### Examples

In [4]:
users_examples = [
    {
        "example": """User ID: 1, Username: jdoe, Password: jdoe, Name: John Doe, 
        Birthday: 1990-05-15, Email: jdoe@example.com, Phone Number: +1234567890, 
        Location: New York, Interests: hiking,reading, Cumulative Rating: 4.5"""
    },
    {
        "example": """User ID: 2, Username: asmith, Password: asmith, Name: Alice Smith, 
        Birthday: 1985-08-20, Email: asmith@example.com, Phone Number: +1987654321, 
        Location: San Francisco, Interests: cooking,traveling, Cumulative Rating: 4.7"""
    },
    {
        "example": """User ID: 3, Username: bjones, Password: bjones, Name: Bob Jones, 
        Birthday: 1992-02-10, Email: bjones@example.com, Phone Number: +1112223333, 
        Location: Chicago, Interests: gaming,cycling, Cumulative Rating: 4.2"""
    },
]

activities_examples = [
    {
        "example": """Activity ID: 1, Host ID: 1, Activity Name: Yoga Class, 
        Activity Description: A relaxing yoga session in the park, for people that like sunrises., 
        Location: Central Park, Number Participants: 10, Max Participants: 20, 
        City: New York, Date Begin: 2024-12-01 08:00:00, Date Finish: 2024-12-01 09:30:00, 
        Activity State: open, Cumulative Rating: 4.8, Pinecone ID: 101"""
    },
    {
        "example": """Activity ID: 2, Host ID: 2, Activity Name: Cooking Workshop, 
        Activity Description: Learn to make gourmet meals, for people that likes to cook and socialize., 
        Location: Culinary Arts Center, Number Participants: 15, Max Participants: 15, 
        City: San Francisco, Date Begin: 2024-12-05 15:00:00, Date Finish: 2024-12-05 18:00:00, 
        Activity State: full, Cumulative Rating: 4.9, Pinecone ID: 102"""
    },
    {
        "example": """Activity ID: 3, Host ID: 3, Activity Name: Cycling Tour, 
        Activity Description: Explore the city on two wheels, for people between 20-40 years old., 
        Location: Millennium Park, Number Participants: 8, Max Participants: 10, 
        City: Chicago, Date Begin: 2021-12-10 09:00:00, Date Finish: 2021-12-10 12:00:00, 
        Activity State: finished, Cumulative Rating: 4.5, Pinecone ID: 103"""
    },
]

reservations_examples = [
    {
        "example": """Activity ID: 1, Host ID: 1, User ID: 2, 
        Message: Looking forward to this yoga session., State: confirmed"""
    },
    {
        "example": """Activity ID: 2, Host ID: 2, User ID: 3, 
        Message: Excited to learn new recipes!, State: confirmed"""
    },
    {
        "example": """Activity ID: 3, Host ID: 3, User ID: 1, 
        Message: Can’t wait for the cycling tour., State: pending"""
    },
]

review_user_examples = [
    {
        "example": """Host ID: 1, Activity ID: 1, User ID: 2, 
        Review: Very bad user, i wont invite him againg, Rating: 1"""
    },
    {
        "example": """Host ID: 2, Activity ID: 2, User ID: 3, 
        Review: He dont talked much., Rating: 3"""
    },
    {
        "example": """Host ID: 3, Activity ID: 3, User ID: 1, 
        Review: Bob was very friendly., Rating: 5"""
    },
]

review_activity_examples = [
    {
        "example": """Activity ID: 1, User ID: 2, 
        Review: The yoga class was very relaxing and enjoyable., Rating: 5"""
    },
    {
        "example": """Activity ID: 2, User ID: 3, 
        Review: Was good, but can improve., Rating: 3"""
    },
    {
        "example": """Activity ID: 3, User ID: 1, 
        Review: I wont repeat that., Rating: 1"""
    },
]

#### Templates

In [5]:
OPENAI_TEMPLATE = PromptTemplate(input_variables=["example"], template="{example}")

prompt_template_users = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=users_examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

prompt_template_activities = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=activities_examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

prompt_template_reservations = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=reservations_examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

prompt_template_review_users= FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=review_user_examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

prompt_template_review_activities = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=review_activity_examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

In [6]:
SYNTHETIC_FEW_SHOT_PREFIX

'This is a test about generating synthetic data about {subject}. Examples below:'

In [7]:
SYNTHETIC_FEW_SHOT_SUFFIX

'Now you generate synthetic data about {subject}. Make sure to {extra}:'

#### LLM for Generating Data

In [13]:
Llm = ChatOpenAI(api_key=api_key, model="gpt-4o-mini", temperature=1)


#### Data Generators

In [14]:
synthetic_data_generator_user = create_openai_data_generator(
    output_schema=User,
    llm=Llm,
    prompt=prompt_template_users,
)

synthetic_data_generator_activities_finished = create_openai_data_generator(
    output_schema=Activity,
    llm=Llm,
    prompt=prompt_template_activities,
)

synthetic_data_generator_activities_ongoing = create_openai_data_generator(
    output_schema=Activity,
    llm=Llm,
    prompt=prompt_template_activities,
)

synthetic_data_generator_reservations_A_finished = create_openai_data_generator(
    output_schema=Reservation,
    llm=Llm,
    prompt=prompt_template_reservations,
)

synthetic_data_generator_reservations_A_ongoing = create_openai_data_generator(
    output_schema=Reservation,
    llm=Llm,
    prompt=prompt_template_reservations,
)

synthetic_data_generator_review_users = create_openai_data_generator(
    output_schema=ReviewUser,
    llm=Llm,
    prompt=prompt_template_review_users,
)

synthetic_data_generator_review_activities = create_openai_data_generator(
    output_schema=ReviewActivity,
    llm=Llm,
    prompt=prompt_template_review_activities,
)

#### Creating Data

In [10]:
print(synthetic_data_generator_user.template.prefix)

This is a test about generating synthetic data about {subject}. Examples below:


In [11]:
print(synthetic_data_generator_user.template.suffix)

Now you generate synthetic data about {subject}. Make sure to {extra}:


In [16]:
predefined_usernames = ['Finn', 'Amelia', 'Aria', 'Sophia', 'Dylan', 'Luna', 'Ines', 'Eli', 'Grace', 
 'James', 'Henry', 'Scarlett', 'Lucas', 'Zoe', 'Hunter', 'Olivia', 'Nora', 'Ethan', 
 'Jack', 'Lila', 'Mason', 'Ella', 'Aiden', 'Noah', 'Levi', 'Elijah', 'Ruby', 
 'Alex', 'Ellie', 'Owen', 'Liam', 'Isaac', 'Logan', 'Ava', 'Eva', 'Adam', 
 'Mila', 'Lily', 'Anna', 'Isabella', 'Ryan', 'Chloe', 'Layla', 'Leah', 
 'Luke', 'Sara', 'Luis', 'Dinis', 'Rafa', 'Tiago']

print(len(set(predefined_usernames)))

for usernames in predefined_usernames:
    synthetic_results_user = synthetic_data_generator_user.generate(
                                subject="Users profile",
                                extra=f"""Username the name with less than 10 characters
                                            you associte with: {usernames}
                                            Password should equal the Username,
                                            Location needs to be chosen randomly from 
                                            (Lisbon, New York, Porto, San Francisco, Miami).""",
                                runs = 1)

synthetic_results_user

50


[User(user_id=4, username='finn', password='finn', name='Finn', birthday=datetime.date(1995, 4, 25), email='finn@example.com', phone_number='+1234567891', location='Miami', interests='surfing,photography', cumulative_rating=0.0),
 User(user_id=5, username='amelia', password='amelia', name='Amelia', birthday=datetime.date(1990, 6, 15), email='amelia@example.com', phone_number='+1234567892', location='New York', interests='reading,piano', cumulative_rating=0.0),
 User(user_id=6, username='aria', password='aria', name='Aria', birthday=datetime.date(1995, 8, 20), email='aria@example.com', phone_number='+1234567893', location='San Francisco', interests='painting,yoga', cumulative_rating=0.0),
 User(user_id=7, username='sophia', password='sophia', name='Sophia', birthday=datetime.date(1992, 2, 14), email='sophia@example.com', phone_number='+1234567894', location='Lisbon', interests='cooking,dancing', cumulative_rating=0.0),
 User(user_id=8, username='dylan', password='dylan', name='Dylan', b

In [17]:
# 25
predefined_activities = ["Go hiking", "Have a picnic", "Play board games", 
                         "Host a movie marathon", "Go bowling", "Have a beach day", 
                         "Visit an escape room", "Go rock climbing", "Do a scavenger hunt", 
                         "Play laser tag", "Go to a concert", "Have a barbecue", 
                         "Visit a theme park", "Do karaoke", "Go ice skating", 
                         "Have a paint-and-sip night", "Play mini golf", "Go camping", 
                         "Try an art class", "Go to a comedy show", "Visit a museum", 
                         "Do a cooking class", "Play soccer or football", 
                         "Have a trivia night", "Go kayaking or canoeing", 
                         "Try paddleboarding", "Have a game night with video games", 
                         "Go to a trampoline park", "Visit a botanical garden", 
                         "Have a photography day", "Go wine tasting", 
                         "Visit a local farmer's market", "Try an escape room challenge", 
                         "Go horseback riding", "Have a potluck dinner", 
                         "Go on a road trip", "Visit a zoo or aquarium", 
                         "Do a yoga or fitness class", "Try a dance class", 
                         "Have a book club meeting", "Play frisbee or ultimate frisbee", 
                         "Go on a bike ride", "Visit a local brewery", "Go stargazing", 
                         "Try a ziplining adventure", "Do a DIY craft project", 
                         "Have a breakfast or brunch outing", "Do volunteer work together", 
                         "Visit a historical site", "Explore a new city or neighborhood"]

print(len(set(predefined_activities)))

for activity_ongoing in predefined_activities[:25]:
    synthetic_results_activities_ongoing = synthetic_data_generator_activities_ongoing.generate(
                                            subject="Activities/Events description",
                                            extra="""Date Finish is after 2024-12-31, 
                                                    Activity description includes people interests (hobbies, age group, personality),
                                                    Max Participants is between 2 to 5 people,
                                                    Number Participants needs to be between 0 and the Max Participants,
                                                    City needs to be choosen from 
                                                    (Lisbon, New York, Porto, San Francisco, Miami),
                                                    Activity State cannot be finished
                                                    Pinecone ID needs to be 0""",
                                            runs=1,
)

synthetic_results_activities_ongoing

50


[Activity(activity_id=4, host_id=4, activity_name='Art Workshop', activity_description='Unleash your creativity in a small group of aspiring artists, perfect for those who love painting and expressing themselves.', location='Art Studio', number_participants=2, max_participants=4, city='Lisbon', date_begin=datetime.datetime(2025, 1, 5, 10, 0), date_finish=datetime.datetime(2025, 1, 5, 12, 0), activity_state='open', cumulative_rating=0.0, pinecone_id=0),
 Activity(activity_id=5, host_id=5, activity_name='Meditation Retreat', activity_description='Reconnect with yourself in a serene environment, ideal for individuals seeking peace and mindfulness, ages 25-50.', location='Serenity Center', number_participants=1, max_participants=5, city='Miami', date_begin=datetime.datetime(2025, 2, 15, 8, 0), date_finish=datetime.datetime(2025, 2, 15, 17, 0), activity_state='open', cumulative_rating=0.0, pinecone_id=0),
 Activity(activity_id=6, host_id=6, activity_name='Photography Walk', activity_descrip

In [18]:
# 25
for activity_ongoing in predefined_activities[25:]:
    synthetic_results_activities_finished = synthetic_data_generator_activities_finished.generate(
                                                    subject="Activities/Events description",
                                                    extra="""Date Finish is before 2024-11-31,
                                                            Max Participants needs to be 2,
                                                            Number Participants needs to be 2,
                                                            Activity Status needs to be finished,
                                                            Pinecone ID needs to be Null""",
                                                    runs=1,
)

synthetic_results_activities_finished

[Activity(activity_id=29, host_id=29, activity_name='Sunset Yoga Retreat', activity_description='Unwind and rejuvenate at our Sunset Yoga Retreat designed for relaxation seekers aged 20-60. Set against the backdrop of breathtaking sunsets, this event invites participants to practice yoga, meditation, and mindfulness together. Suitable for all levels, this retreat focuses on holistic well-being and allows you to connect with a supportive community while enjoying the serene ambiance. Bring your mat and enjoy a peaceful evening of reflection and tranquility!', location='Cliffside Park', number_participants=2, max_participants=2, city='Santa Monica', date_begin=datetime.datetime(2024, 8, 15, 18, 0), date_finish=datetime.datetime(2024, 8, 15, 20, 0), activity_state='finished', cumulative_rating=0.0, pinecone_id=None),
 Activity(activity_id=30, host_id=30, activity_name='Moonlight Stargazing Night', activity_description='Join astronomy enthusiasts aged 15-50 for a magical Moonlight Stargazin

In [19]:
# 50
synthetic_results_reservations_Activity_Ended = synthetic_data_generator_reservations_A_finished.generate(
    subject="Reservation Log",
    extra="""Message needs to be a request for someone that 
             will reject or accept the reservation, the Message can show 
             motivation and peoples interests,
             State needs to be confirmed""",
    runs=50,
)

synthetic_results_reservations_Activity_Ended

[Reservation(activity_id=4, host_id=4, user_id=5, message='I’m really excited for this painting class, can you confirm my spot?', state='confirmed'),
 Reservation(activity_id=5, host_id=2, user_id=4, message='I’m looking forward to the pottery workshop! Please let me know if my spot is secured!', state='confirmed'),
 Reservation(activity_id=6, host_id=1, user_id=3, message="I can't wait to join the cooking class! Please confirm my reservation, I'd love to learn new recipes!", state='confirmed'),
 Reservation(activity_id=7, host_id=3, user_id=8, message='I am so eager to join the photography workshop! It would mean a lot if you could secure my spot so I can expand my skills in capturing beautiful moments!', state='confirmed'),
 Reservation(activity_id=8, host_id=2, user_id=5, message='I’m really excited about the upcoming yoga retreat! I’ve been looking forward to deepening my practice. Please confirm my spot when you can!', state='confirmed'),
 Reservation(activity_id=9, host_id=4, use

In [20]:
# 25
synthetic_results_reservations_Activity_ongoin = synthetic_data_generator_reservations_A_ongoing.generate(
    subject="Reservation Log",
    extra="""Message needs to be a request for someone that 
             will reject or accept the reservation, the Message can show 
             motivation and peoples interests""",
    runs=25,
)

synthetic_results_reservations_Activity_ongoin

[Reservation(activity_id=54, host_id=31, user_id=45, message='I am thrilled to submit my reservation request for the upcoming dance workshop! Dancing has been a lifelong passion of mine, and I am eager to learn new styles and techniques from talented instructors. I hope to connect with others who share the same enthusiasm for dance. Please review my application and confirm my reservation, as I am excited to immerse myself in this vibrant community and elevate my skills!', state='pending'),
 Reservation(activity_id=55, host_id=32, user_id=46, message='I am very excited to request my reservation for the upcoming photography workshop! I have been passionate about photography for years, and this workshop seems like the perfect opportunity to enhance my skills, learn from experienced photographers, and meet others who share my love for capturing moments. I am particularly interested in portrait photography and would love to gain insights on how to effectively engage with subjects. Please co

In [15]:
# 50
synthetic_results_review_users = synthetic_data_generator_review_users.generate(
    subject="Review of users",
    extra="""Review neest to be about the user behavior, 
             Rating needs to be between 1 and 5 (can be 1 and 5),
             Rating needs to be related with the review """,
    runs=50,
)

synthetic_results_review_users

[ReviewUser(host_id=4, activity_id=2, user_id=7, review='Liam was quiet during the activity and did not participate much, which made it hard for the group to engage fully.', rating=2),
 ReviewUser(host_id=2, activity_id=3, user_id=8, review='Emily was very enthusiastic and actively engaged with everyone, contributing to a lively atmosphere that made the activity enjoyable.', rating=5),
 ReviewUser(host_id=1, activity_id=4, user_id=10, review='John was very helpful during the activity, offering assistance and ensuring that everyone understood the tasks at hand. His positive attitude greatly enhanced the experience.', rating=5),
 ReviewUser(host_id=3, activity_id=1, user_id=5, review='Sarah was disengaged and often distracted during the activity, which took away from the overall experience for the rest of the group.', rating=1),
 ReviewUser(host_id=4, activity_id=2, user_id=6, review='Michael was incredibly organized and punctual, ensuring that everything ran smoothly. His attention to d

In [22]:
# 50
synthetic_results_review_activities = synthetic_data_generator_review_activities.generate(
    subject="Review of events/activities",
    extra="""Review needs to be about the activity or behaviour of host,
             Rating needs to be between 1 and 5 (can be 1 and 5),
             Rating needs to be realted with the review""",
    runs=50,
)

synthetic_results_review_activities

[ReviewActivity(activity_id=4, user_id=5, review='The cooking workshop was well-organized and informative, I learned a lot!', rating=5),
 ReviewActivity(activity_id=1, user_id=2, review='The host was engaging and the activities were fun, but some aspects felt rushed.', rating=4),
 ReviewActivity(activity_id=2, user_id=3, review='The event was poorly planned, and the host seemed unprepared, which made it frustrating to attend.', rating=2),
 ReviewActivity(activity_id=3, user_id=7, review='The yoga session was relaxing and the instructor was very calming, but the studio was quite crowded.', rating=4),
 ReviewActivity(activity_id=4, user_id=5, review='The cooking class was informative and fun, but the kitchen was a bit disorganized, which slowed things down.', rating=3),
 ReviewActivity(activity_id=1, user_id=8, review='The concert was amazing, with great energy from the performers, although the sound quality was not the best.', rating=4),
 ReviewActivity(activity_id=2, user_id=10, review

In [None]:
# backup
with open("data_generated_NO_FK.pkl", "wb") as file:
    pickle.dump([synthetic_results_user,
                 synthetic_results_activities_ongoing,
                 synthetic_results_activities_finished, 
                 synthetic_results_reservations_Activity_Ended,
                 synthetic_results_reservations_Activity_ongoin,
                 synthetic_results_review_users,
                 synthetic_results_review_activities], file)

#### Ensuring Foreing Keys

In [14]:
# Importing variables from a pickle file
with open("data_generated_NO_FK.pkl", "rb") as file:
    synthetic_results_user, synthetic_results_activities_ongoing,\
    synthetic_results_activities_finished, synthetic_results_reservations_Activity_Ended,\
    synthetic_results_reservations_Activity_ongoin, synthetic_results_review_users,\
     synthetic_results_review_activities = pickle.load(file)


In [15]:
User_IDs = list(range(1,51))

Activity_IDs = list(range(1,51))

We will put the ids to create an order:
+ User ID from 1-50.
+ Activity ID from 1-50.
+ The users from 1-25 will have activities (2 each).
+ The users from 1-13 will have finished activities (2 each, and 1 with 1).
+ The users from 14-25 will have ongoing activities (2 each and 1 with 3).
+ The Activities from 1-25 will be finished.
+ The activities from 26-50 will be ongoing (on RAG).
+ The users from 26-50 will have reservations in activities (2 reservations each activity finished).
+ The users form 1-12 and 26-50 will have reservations in the ongoing activities.
+ Activity from 26-39 will have no reservations.
+ Activity from 40-49 will have 1 confirmed reservations and 1 pending (20 in total).
+ Activity 50 will have Max_participants - 1 confirmed reservations and 1 pending, activity 50 has 5 as max number of participants (5 in total).
+ Users 26 to 41 will be the ones making the reservations for the ongoing activities.

In [16]:
for i, user_data in enumerate(synthetic_results_user):
    user_data.user_id = User_IDs[i]
    user_data.cumulative_rating = np.random.randint(1,6)

In [17]:
synthetic_results_user

[User(user_id=1, username='finn', password='finn', name='Finn', birthday=datetime.date(1995, 4, 25), email='finn@example.com', phone_number='+1234567891', location='Miami', interests='surfing,photography', cumulative_rating=3),
 User(user_id=2, username='amelia', password='amelia', name='Amelia', birthday=datetime.date(1990, 6, 15), email='amelia@example.com', phone_number='+1234567892', location='New York', interests='reading,piano', cumulative_rating=3),
 User(user_id=3, username='aria', password='aria', name='Aria', birthday=datetime.date(1995, 8, 20), email='aria@example.com', phone_number='+1234567893', location='San Francisco', interests='painting,yoga', cumulative_rating=2),
 User(user_id=4, username='sophia', password='sophia', name='Sophia', birthday=datetime.date(1992, 2, 14), email='sophia@example.com', phone_number='+1234567894', location='Lisbon', interests='cooking,dancing', cumulative_rating=3),
 User(user_id=5, username='dylan', password='dylan', name='Dylan', birthday=

In [18]:
user_index = 0

for i, activity_data in enumerate(synthetic_results_activities_finished):
    
    activity_data.activity_id = Activity_IDs[i]
    activity_data.host_id = User_IDs[user_index]
    activity_data.number_participants = 2
    activity_data.max_participants = 2
    activity_data.activity_state = 'finished'
    activity_data.cumulative_rating = np.random.randint(1,6)

    if i%2==1:
        user_index += 1

In [19]:
synthetic_results_activities_finished

[Activity(activity_id=1, host_id=1, activity_name='Sunset Yoga Retreat', activity_description='Unwind and rejuvenate at our Sunset Yoga Retreat designed for relaxation seekers aged 20-60. Set against the backdrop of breathtaking sunsets, this event invites participants to practice yoga, meditation, and mindfulness together. Suitable for all levels, this retreat focuses on holistic well-being and allows you to connect with a supportive community while enjoying the serene ambiance. Bring your mat and enjoy a peaceful evening of reflection and tranquility!', location='Cliffside Park', number_participants=2, max_participants=2, city='Santa Monica', date_begin=datetime.datetime(2024, 8, 15, 18, 0), date_finish=datetime.datetime(2024, 8, 15, 20, 0), activity_state='finished', cumulative_rating=4, pinecone_id=None),
 Activity(activity_id=2, host_id=1, activity_name='Moonlight Stargazing Night', activity_description='Join astronomy enthusiasts aged 15-50 for a magical Moonlight Stargazing Nigh

In [20]:
start_index = 25
user_index = 13

for i, activity_data in enumerate(synthetic_results_activities_ongoing):
    
    activity_data.activity_id = Activity_IDs[start_index+i]
    activity_data.host_id = User_IDs[user_index]
    activity_data.pinecone_id = Activity_IDs[start_index+i]

    # max number from 2-5 since creating data only got 4 and 5
    activity_data.max_participants = np.random.randint(2,6)

    if activity_data.activity_id < 41:
        activity_data.number_participants = 0

    elif activity_data.activity_id < 50:
        activity_data.number_participants = 1
    
    else:
        # activity 50
        activity_data.max_participants = 5
        activity_data.number_participants = activity_data.max_participants - 1

    activity_data.activity_state = 'open'

    if i%2==1 and user_index < 24 :
        user_index += 1

In [21]:
synthetic_results_activities_ongoing

[Activity(activity_id=26, host_id=14, activity_name='Art Workshop', activity_description='Unleash your creativity in a small group of aspiring artists, perfect for those who love painting and expressing themselves.', location='Art Studio', number_participants=0, max_participants=3, city='Lisbon', date_begin=datetime.datetime(2025, 1, 5, 10, 0), date_finish=datetime.datetime(2025, 1, 5, 12, 0), activity_state='open', cumulative_rating=0.0, pinecone_id=26),
 Activity(activity_id=27, host_id=14, activity_name='Meditation Retreat', activity_description='Reconnect with yourself in a serene environment, ideal for individuals seeking peace and mindfulness, ages 25-50.', location='Serenity Center', number_participants=0, max_participants=5, city='Miami', date_begin=datetime.datetime(2025, 2, 15, 8, 0), date_finish=datetime.datetime(2025, 2, 15, 17, 0), activity_state='open', cumulative_rating=0.0, pinecone_id=27),
 Activity(activity_id=28, host_id=15, activity_name='Photography Walk', activity

In [22]:
start_i = 25

user_index = 25
previous_user_val = 26

host_index = 0
counter_host_i = 0

activity_index = 0


for i, reservation_activity in enumerate(synthetic_results_reservations_Activity_Ended):

    reservation_activity.activity_id = Activity_IDs[activity_index]
    reservation_activity.host_id = User_IDs[host_index]
    reservation_activity.state = 'confirmed'
        
    if i == 49:
        reservation_activity.user_id = User_IDs[start_i]
        break
    else:
        reservation_activity.user_id = User_IDs[user_index]

    if i%2==1:
        activity_index += 1
    
    counter_host_i += 1
    if counter_host_i == 4:
        host_index += 1
        counter_host_i = 0 
        
    if previous_user_val == User_IDs[user_index]:
        user_index += 1
    else:
        previous_user_val = User_IDs[user_index]

In [23]:
# the message can have no sence redarding the acivity
# is generated data, and we are using it to tets

synthetic_results_reservations_Activity_Ended

[Reservation(activity_id=1, host_id=1, user_id=26, message='I’m really excited for this painting class, can you confirm my spot?', state='confirmed'),
 Reservation(activity_id=1, host_id=1, user_id=27, message='I’m looking forward to the pottery workshop! Please let me know if my spot is secured!', state='confirmed'),
 Reservation(activity_id=2, host_id=1, user_id=27, message="I can't wait to join the cooking class! Please confirm my reservation, I'd love to learn new recipes!", state='confirmed'),
 Reservation(activity_id=2, host_id=1, user_id=28, message='I am so eager to join the photography workshop! It would mean a lot if you could secure my spot so I can expand my skills in capturing beautiful moments!', state='confirmed'),
 Reservation(activity_id=3, host_id=2, user_id=28, message='I’m really excited about the upcoming yoga retreat! I’ve been looking forward to deepening my practice. Please confirm my spot when you can!', state='confirmed'),
 Reservation(activity_id=3, host_id=2

In [24]:
start_i = 25

user_index = 25
previous_user_val = 26

host_index = 0
counter_host_i = 0

activity_index = 0

for i, review_users in enumerate(synthetic_results_review_users):

    review_users.activity_id = Activity_IDs[activity_index]
    review_users.host_id = User_IDs[host_index]
        
    if i == 49:
        review_users.user_id = User_IDs[start_i]
        break
    else:
        review_users.user_id = User_IDs[user_index]

    if i%2==1:
        activity_index += 1
    
    counter_host_i += 1
    if counter_host_i == 4:
        host_index += 1
        counter_host_i = 0 
        
    if previous_user_val == User_IDs[user_index]:
        user_index += 1
    else:
        previous_user_val = User_IDs[user_index]

In [25]:
# we know names of the messages donest macth with the name of the person, 
# but we dont care since the user dont see their review, the activity already ended
# and is generated data

synthetic_results_review_users

[ReviewUser(host_id=1, activity_id=1, user_id=26, review='Liam was quiet during the activity and did not participate much, which made it hard for the group to engage fully.', rating=2),
 ReviewUser(host_id=1, activity_id=1, user_id=27, review='Emily was very enthusiastic and actively engaged with everyone, contributing to a lively atmosphere that made the activity enjoyable.', rating=5),
 ReviewUser(host_id=1, activity_id=2, user_id=27, review='John was very helpful during the activity, offering assistance and ensuring that everyone understood the tasks at hand. His positive attitude greatly enhanced the experience.', rating=5),
 ReviewUser(host_id=1, activity_id=2, user_id=28, review='Sarah was disengaged and often distracted during the activity, which took away from the overall experience for the rest of the group.', rating=1),
 ReviewUser(host_id=2, activity_id=3, user_id=28, review='Michael was incredibly organized and punctual, ensuring that everything ran smoothly. His attention 

In [26]:
start_i = 25

user_index = 25
previous_user_val = 26

activity_index = 0

for i, review_activities in enumerate(synthetic_results_review_activities):

    review_activities.activity_id = Activity_IDs[activity_index]
        
    if i == 49:
        review_activities.user_id = User_IDs[start_i]
        break

    else:
        review_activities.user_id = User_IDs[user_index]

    if i%2==1:
        activity_index += 1
        
    if previous_user_val == User_IDs[user_index]:
        user_index += 1
    else:
        previous_user_val = User_IDs[user_index]

In [27]:
# we know names of the messages doenst macth with the activity, 
# but we dont care since the data is generated data, and 
# we only will use it to test it.

synthetic_results_review_activities

[ReviewActivity(activity_id=1, user_id=26, review='The cooking workshop was well-organized and informative, I learned a lot!', rating=5),
 ReviewActivity(activity_id=1, user_id=27, review='The host was engaging and the activities were fun, but some aspects felt rushed.', rating=4),
 ReviewActivity(activity_id=2, user_id=27, review='The event was poorly planned, and the host seemed unprepared, which made it frustrating to attend.', rating=2),
 ReviewActivity(activity_id=2, user_id=28, review='The yoga session was relaxing and the instructor was very calming, but the studio was quite crowded.', rating=4),
 ReviewActivity(activity_id=3, user_id=28, review='The cooking class was informative and fun, but the kitchen was a bit disorganized, which slowed things down.', rating=3),
 ReviewActivity(activity_id=3, user_id=29, review='The concert was amazing, with great energy from the performers, although the sound quality was not the best.', rating=4),
 ReviewActivity(activity_id=4, user_id=29, 

In [28]:
start_i = 40

user_index = 25
previous_user_val = 26

activity_index = 39
counter_activity_50 = 0

state_count_2 = 0
state_count_5 = 0

host_index = int(Activity_IDs[activity_index]/2)


for i, reservation_activity in enumerate(synthetic_results_reservations_Activity_ongoin):

    reservation_activity.activity_id = Activity_IDs[activity_index]
    reservation_activity.host_id = User_IDs[host_index]
    reservation_activity.user_id = User_IDs[user_index]

    if activity_index < 49:

        if state_count_2 < 1:
            reservation_activity.state = 'confirmed'
        
        else:
            reservation_activity.state = 'pending'

        if state_count_2 ==1:
            state_count_2 = 0
        
        else:
            state_count_2 += 1
    
    else:
        if state_count_5 < 4:
            reservation_activity.state = 'confirmed'
        
        else:
            reservation_activity.state = 'pending'

        state_count_5 += 1


    if i%2==1 and activity_index < 49:
        activity_index += 1
        
    else: 
        # activity 50 has 5 slots
        counter_activity_50 +=1


    # stops updating the host index since host 25 has the activities 48, 49 and 50
    if activity_index < 49:
        host_index = int(Activity_IDs[activity_index]/2)


    # update every 2 people
    if previous_user_val == User_IDs[user_index] and activity_index < 49:
        user_index += 1

    elif activity_index < 49:
        previous_user_val = User_IDs[user_index]

    # update user in each interation in the last activity
    else:
        user_index += 1

In [29]:
# the message can have no sence redarding the acivity
# is generated data, and we are using it to tets

synthetic_results_reservations_Activity_ongoin

[Reservation(activity_id=40, host_id=21, user_id=26, message='I am thrilled to submit my reservation request for the upcoming dance workshop! Dancing has been a lifelong passion of mine, and I am eager to learn new styles and techniques from talented instructors. I hope to connect with others who share the same enthusiasm for dance. Please review my application and confirm my reservation, as I am excited to immerse myself in this vibrant community and elevate my skills!', state='confirmed'),
 Reservation(activity_id=40, host_id=21, user_id=27, message='I am very excited to request my reservation for the upcoming photography workshop! I have been passionate about photography for years, and this workshop seems like the perfect opportunity to enhance my skills, learn from experienced photographers, and meet others who share my love for capturing moments. I am particularly interested in portrait photography and would love to gain insights on how to effectively engage with subjects. Please 

In [41]:
#Backup
with open("data_generated_Clean.pkl", "wb") as file:
    pickle.dump([synthetic_results_user,
                 synthetic_results_activities_ongoing,
                 synthetic_results_activities_finished, 
                 synthetic_results_reservations_Activity_Ended,
                 synthetic_results_reservations_Activity_ongoin,
                 synthetic_results_review_users,
                 synthetic_results_review_activities], file)

#### Pinecone of Open Activities

In [5]:
# Importing variables from a pickle file
with open("data_generated_Clean.pkl", "rb") as file:
    synthetic_results_user, synthetic_results_activities_ongoing,\
    synthetic_results_activities_finished, synthetic_results_reservations_Activity_Ended,\
    synthetic_results_reservations_Activity_ongoin, synthetic_results_review_users,\
     synthetic_results_review_activities = pickle.load(file)

In [32]:
synthetic_results_activities_ongoing

[Activity(activity_id=26, host_id=14, activity_name='Art Workshop', activity_description='Unleash your creativity in a small group of aspiring artists, perfect for those who love painting and expressing themselves.', location='Art Studio', number_participants=0, max_participants=3, city='Lisbon', date_begin=datetime.datetime(2025, 1, 5, 10, 0), date_finish=datetime.datetime(2025, 1, 5, 12, 0), activity_state='open', cumulative_rating=0.0, pinecone_id=26),
 Activity(activity_id=27, host_id=14, activity_name='Meditation Retreat', activity_description='Reconnect with yourself in a serene environment, ideal for individuals seeking peace and mindfulness, ages 25-50.', location='Serenity Center', number_participants=0, max_participants=5, city='Miami', date_begin=datetime.datetime(2025, 2, 15, 8, 0), date_finish=datetime.datetime(2025, 2, 15, 17, 0), activity_state='open', cumulative_rating=0.0, pinecone_id=27),
 Activity(activity_id=28, host_id=15, activity_name='Photography Walk', activity

In [11]:


# OpenAI Chat Model
llm = ChatOpenAI(model="gpt-4", temperature=0)

# Prompt Template for the Activity
prompt_template = PromptTemplate(
    input_variables=["activity"],
    template="""Transform the following structured activity data into a human-readable text
                using less than 500 characters:

Activity Data:
{activity}

Human-Readable Text:"""
)

# Define the Runnable Chain
chain = (prompt_template | llm | StrOutputParser())

# Convert the Activity object into a readable string
def format_activity(activity: Activity) -> str:
    return (
        f"Activity Name: {activity.activity_name}, Activity Description: {activity.activity_description}"
    )

In [12]:
# testing

# Format the activity
formatted_activity = format_activity(synthetic_results_activities_ongoing[0])

# Run the chain
result = chain.invoke({"activity": formatted_activity})

# Print the result
print(result)

The Art Workshop is an activity designed to unleash your creativity. It's perfect for those who love painting and expressing themselves. You'll be in a small group of aspiring artists, allowing for a more intimate and focused environment.


In [13]:
print(len(result))

238


In [15]:
result

"The Art Workshop is an activity designed to unleash your creativity. It's perfect for those who love painting and expressing themselves. You'll be in a small group of aspiring artists, allowing for a more intimate and focused environment."

In [16]:
text_for_embedings = []

for activity in synthetic_results_activities_ongoing:
    text_for_embedings.append((activity.activity_id, chain.invoke({"activity": format_activity(activity)})))

In [18]:
text_for_embedings

[(26,
  'Join the Art Workshop where you can unleash your creativity alongside a small group of aspiring artists. This activity is perfect for those who love painting and expressing themselves.'),
 (27,
  "The Meditation Retreat is a perfect opportunity to reconnect with yourself in a peaceful and serene environment. It's an ideal activity for individuals aged 25-50 who are seeking peace and mindfulness."),
 (28,
  "Join the Photography Walk, a fun-filled activity designed for photography enthusiasts aged 18-35. During this leisurely walk, you'll have the opportunity to capture the city's beauty through your lens. It's a perfect event for those who love exploring and expressing their creativity."),
 (29,
  'The Creative Writing Workshop is an immersive experience into the world of storytelling and poetry. It provides a supportive environment perfect for aspiring writers aged 20-40 who are keen on expressing themselves and honing their writing skills.'),
 (30,
  'Enjoy a Wine Tasting Ex

In [19]:
#Backup
with open("text_for _embedings_and _ID.pkl", "wb") as file:
    pickle.dump(text_for_embedings, file)

In [4]:
with open("text_for _embedings_and _ID.pkl", "rb") as file:
    text_for_embedings = pickle.load(file)

In [5]:
text_for_embedings

[(26,
  'Join the Art Workshop where you can unleash your creativity alongside a small group of aspiring artists. This activity is perfect for those who love painting and expressing themselves.'),
 (27,
  "The Meditation Retreat is a perfect opportunity to reconnect with yourself in a peaceful and serene environment. It's an ideal activity for individuals aged 25-50 who are seeking peace and mindfulness."),
 (28,
  "Join the Photography Walk, a fun-filled activity designed for photography enthusiasts aged 18-35. During this leisurely walk, you'll have the opportunity to capture the city's beauty through your lens. It's a perfect event for those who love exploring and expressing their creativity."),
 (29,
  'The Creative Writing Workshop is an immersive experience into the world of storytelling and poetry. It provides a supportive environment perfect for aspiring writers aged 20-40 who are keen on expressing themselves and honing their writing skills.'),
 (30,
  'Enjoy a Wine Tasting Ex

In [20]:
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")

embeddings = embeddings_model.embed_documents(
    [activity[1] for activity in text_for_embedings]
)

len(embeddings), len(embeddings[0])

(25, 1536)

In [21]:
# we will have 25 vectors with 1536 dimentions
embeddings

[[-0.00895447377115488,
  0.005891998298466206,
  0.013783981092274189,
  -0.013715799897909164,
  -0.02195437066257,
  -0.0016633389750495553,
  0.02879522368311882,
  0.06240859255194664,
  -0.04195421189069748,
  0.0007137727225199342,
  0.011943086050450802,
  -0.00827266089618206,
  -0.010704459622502327,
  -0.05159049853682518,
  -0.00814766250550747,
  0.0582268089056015,
  -0.03147702291607857,
  0.015352150425314903,
  0.0580904446542263,
  0.008164707571268082,
  -0.007988572120666504,
  0.01544305868446827,
  -0.0639994889497757,
  0.03134065866470337,
  0.020238474011421204,
  -0.028817951679229736,
  -0.024863436818122864,
  -0.023340722545981407,
  0.00514768622815609,
  0.025181617587804794,
  -0.008215843699872494,
  -0.02240891195833683,
  0.020693017169833183,
  0.010380598716437817,
  -0.052545033395290375,
  0.016727138310670853,
  0.038272421807050705,
  -0.00778402853757143,
  -0.025090709328651428,
  -0.03622698411345482,
  0.03654516115784645,
  0.00983514823019

In [22]:
# Initialize a connection to Pinecone
pc = Pinecone()

In [23]:
pc.list_indexes()

{'indexes': [{'deletion_protection': 'disabled',
              'dimension': 1536,
              'host': 'activities-mnbjbch.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'activities',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [24]:
index: Index = pc.Index("activities")

In [25]:
# Initialize a Pinecone vector store with OpenAI embeddings
vector_store = PineconeVectorStore(
    index=index, embedding=OpenAIEmbeddings(model="text-embedding-3-small")
)

In [26]:
ids = [str(i) for i in list(zip(*text_for_embedings))[0]]
ids 

['26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50']

In [27]:
documents_activities = []

for i, text in enumerate(list(zip(*text_for_embedings))[1]):
    documents_activities.append(Document(page_content=text, metadata={"pinecone_id": ids[i]}))

documents_activities 

[Document(metadata={'pinecone_id': '26'}, page_content='Join the Art Workshop where you can unleash your creativity alongside a small group of aspiring artists. This activity is perfect for those who love painting and expressing themselves.'),
 Document(metadata={'pinecone_id': '27'}, page_content="The Meditation Retreat is a perfect opportunity to reconnect with yourself in a peaceful and serene environment. It's an ideal activity for individuals aged 25-50 who are seeking peace and mindfulness."),
 Document(metadata={'pinecone_id': '28'}, page_content="Join the Photography Walk, a fun-filled activity designed for photography enthusiasts aged 18-35. During this leisurely walk, you'll have the opportunity to capture the city's beauty through your lens. It's a perfect event for those who love exploring and expressing their creativity."),
 Document(metadata={'pinecone_id': '29'}, page_content='The Creative Writing Workshop is an immersive experience into the world of storytelling and poe

In [28]:
# Add the documents and their embeddings to the vector store
vector_store.add_documents(documents=documents_activities, ids=ids)

['26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50']

#### Inserting into the Database

In [14]:
# Importing variables from a pickle file
with open("data_generated_Clean.pkl", "rb") as file:
    synthetic_results_user, synthetic_results_activities_ongoing,\
    synthetic_results_activities_finished, synthetic_results_reservations_Activity_Ended,\
    synthetic_results_reservations_Activity_ongoin, synthetic_results_review_users,\
     synthetic_results_review_activities = pickle.load(file)

In [15]:
# sqllite

conn = sqlite3.connect("BeAlive.db")
c = conn.cursor()
# Insert data into users

# Insert data into users
for user_data in synthetic_results_user:
    c.execute('''INSERT INTO users (user_id, username, password, name, birthday, email, phone_number, location, interests, cumulative_rating)
                 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
              (user_data.user_id, user_data.username, user_data.password, user_data.name, user_data.birthday,
               user_data.email, user_data.phone_number, user_data.location, user_data.interests, user_data.cumulative_rating))

# Insert data into activities (finished)
for activity_data in synthetic_results_activities_finished:
    c.execute('''INSERT INTO activities (activity_id, host_id, activity_name, activity_description, location, number_participants, max_participants, city, date_begin, date_finish, activity_state, cumulative_rating, pinecone_id)
                 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
              (activity_data.activity_id, activity_data.host_id, activity_data.activity_name, activity_data.activity_description,
               activity_data.location, activity_data.number_participants, activity_data.max_participants, activity_data.city,
               activity_data.date_begin, activity_data.date_finish, activity_data.activity_state, activity_data.cumulative_rating, activity_data.pinecone_id))

# Insert data into activities (ongoing)
for activity_data in synthetic_results_activities_ongoing:
    c.execute('''INSERT INTO activities (activity_id, host_id, activity_name, activity_description, location, number_participants, max_participants, city, date_begin, date_finish, activity_state, cumulative_rating, pinecone_id)
                 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
              (activity_data.activity_id, activity_data.host_id, activity_data.activity_name, activity_data.activity_description,
               activity_data.location, activity_data.number_participants, activity_data.max_participants, activity_data.city,
               activity_data.date_begin, activity_data.date_finish, activity_data.activity_state, activity_data.cumulative_rating, activity_data.pinecone_id))

# Insert data into reservations (activity ended)
for reservation_data in synthetic_results_reservations_Activity_Ended:
    c.execute('''INSERT INTO reservations (activity_id, host_id, user_id, message, state)
                 VALUES (?, ?, ?, ?, ?)''',
              (reservation_data.activity_id, reservation_data.host_id, reservation_data.user_id, reservation_data.message, reservation_data.state))

# Insert data into reservations (activity ongoing)
for reservation_data in synthetic_results_reservations_Activity_ongoin:
    c.execute('''INSERT INTO reservations (activity_id, host_id, user_id, message, state)
                 VALUES (?, ?, ?, ?, ?)''',
              (reservation_data.activity_id, reservation_data.host_id, reservation_data.user_id, reservation_data.message, reservation_data.state))

# Insert data into review_user
for review_user_data in synthetic_results_review_users:
    c.execute('''INSERT INTO review_user (host_id, activity_id, user_id, review, rating)
                 VALUES (?, ?, ?, ?, ?)''',
              (review_user_data.host_id, review_user_data.activity_id, review_user_data.user_id, review_user_data.review, review_user_data.rating))

# Insert data into review_activity
for review_activity_data in synthetic_results_review_activities:
    c.execute('''INSERT INTO review_activity (activity_id, user_id, review, rating)
                 VALUES (?, ?, ?, ?)''',
              (review_activity_data.activity_id, review_activity_data.user_id, review_activity_data.review, review_activity_data.rating))


# Commit changes and close the connection
conn.commit()
conn.close()

  c.execute('''INSERT INTO users (user_id, username, password, name, birthday, email, phone_number, location, interests, cumulative_rating)
  c.execute('''INSERT INTO activities (activity_id, host_id, activity_name, activity_description, location, number_participants, max_participants, city, date_begin, date_finish, activity_state, cumulative_rating, pinecone_id)
  c.execute('''INSERT INTO activities (activity_id, host_id, activity_name, activity_description, location, number_participants, max_participants, city, date_begin, date_finish, activity_state, cumulative_rating, pinecone_id)


In [16]:
# Connect to the SQLite database
conn = sqlite3.connect("BeAlive.db")
c = conn.cursor()

# Query to fetch all rows from the `users` table
c.execute("SELECT * FROM activities")
rows = c.fetchall()

# Display the rows
for row in rows:
    print(row)

# Close the connection
conn.close()

(1, 1, 'Sunset Yoga Retreat', 'Unwind and rejuvenate at our Sunset Yoga Retreat designed for relaxation seekers aged 20-60. Set against the backdrop of breathtaking sunsets, this event invites participants to practice yoga, meditation, and mindfulness together. Suitable for all levels, this retreat focuses on holistic well-being and allows you to connect with a supportive community while enjoying the serene ambiance. Bring your mat and enjoy a peaceful evening of reflection and tranquility!', 'Cliffside Park', 2, 2, 'Santa Monica', '2024-08-15 18:00:00', '2024-08-15 20:00:00', 'finished', 4.0, None)
(2, 1, 'Moonlight Stargazing Night', 'Join astronomy enthusiasts aged 15-50 for a magical Moonlight Stargazing Night! Set in a serene park away from city lights, this event offers a chance to marvel at the stars and learn about constellations and celestial events. Participants will have the opportunity to use telescopes and share their love for the night sky with fellow stargazers. Bring a 