In [19]:
import pandas as pd
import numpy as np
from faker import Faker
import random


In [21]:
import pandas as pd
import numpy as np
from faker import Faker
import random

fake = Faker()

# Constants
NUM_USERS = 50          # Number of unique users
INVITES_PER_USER = 100  # Number of invites per user
TOTAL_RECORDS = NUM_USERS * INVITES_PER_USER

# --- Step 1: Generate a list of users ---
users = []
for i in range(NUM_USERS):
    user = {
        "user_id": i + 1,
        "user_name": fake.name(),
        "office_location": fake.address().replace("\n", ", ")
    }
    users.append(user)

# --- Step 2: Define meeting title and location generation functions ---
meeting_titles = [
    "Client Meeting", "Team Standup", "Sales Call", "Strategy Session", "Project Kickoff",
    "Quarterly Review", "Budget Discussion", "Training Session", "One-on-One", "Product Demo",
    "Performance Review", "Board Meeting", "Town Hall", "HR Interview", "All-Hands Meeting",
    "Client Demo", "Marketing Sync", "Technical Review", "Operations Update"
]

modifiers = [
    "", " - Follow-Up", " - Q3 Planning", " - with Marketing", " - Urgent", 
    " - Remote Session", " - In-Person", " - Strategy Focus", " - Recurring", " - Special Session"
]

def generate_meeting_title():
    """Generate a descriptive meeting title by combining a base title with an optional modifier."""
    base_title = random.choice(meeting_titles)
    modifier = random.choice(modifiers)
    return base_title + modifier

def random_location_and_attendance(user_office_location):
    """
    Determines the meeting location and the eventual attendance choice.
    
    Parameters:
        user_office_location (str): The office location for the invitee.
    
    Returns:
        location (str): A single field that may contain a physical address, "Online", 
                        or a combination of a physical address and virtual meeting details.
        attendance_choice (str): Either "attended_in_person" or "attended_online".
        meeting_url (str or None): The URL for virtual/hybrid meetings if applicable.
    """
    r = random.random()
    meeting_url = None

    if r < 0.4:  # Virtual meeting
        platforms = ["Zoom", "Microsoft Teams", "Google Meet", "Webex"]
        platform = random.choice(platforms)
        meeting_url = fake.url()
        # With 50% chance, just show "Online" in the location field; otherwise include platform info.
        if random.random() < 0.5:
            location = "Online"
        else:
            location = f"{platform} Meeting - {meeting_url}"
        attendance_choice = "attended_online"

    elif r < 0.8:  # In-person meeting
        # 50% chance to use the user's office location; otherwise use a random physical address.
        if random.random() < 0.5:
            location = user_office_location
        else:
            location = fake.address().replace("\n", ", ")
        attendance_choice = "attended_in_person"

    else:  # Hybrid meeting (both options available)
        # Determine a physical address.
        if random.random() < 0.5:
            physical = user_office_location
        else:
            physical = fake.address().replace("\n", ", ")
        platforms = ["Zoom", "Microsoft Teams", "Google Meet", "Webex"]
        platform = random.choice(platforms)
        meeting_url = fake.url()
        # With 50% chance, combine physical and virtual info; otherwise show only the physical address.
        if random.random() < 0.5:
            location = f"{physical} | {platform} Meeting: {meeting_url}"
        else:
            location = physical
        # For hybrid meetings, randomly choose the mode of attendance.
        attendance_choice = random.choices(
            ["attended_in_person", "attended_online"], weights=[0.7, 0.3]
        )[0]
    
    return location, attendance_choice, meeting_url

# --- Step 3: Generate meeting invites for each user ---
data = []

for user in users:
    user_id = user["user_id"]
    user_office_location = user["office_location"]
    
    for i in range(INVITES_PER_USER):
        # Create a unique meeting_id per user (e.g., "23-45" for the 45th meeting of user 23)
        meeting_id = f"{user_id}-{i+1}"
        title = generate_meeting_title()
        
        # Generate a start time randomly over the past 3 years (approximately 1095 days)
        start_time = fake.date_time_between(start_date="-1095d", end_date="now")
        # Meeting duration between 30 minutes and 2 hours.
        duration = np.random.randint(30, 120)
        end_time = start_time + pd.Timedelta(minutes=duration)
        
        location, attendance_choice, meeting_url = random_location_and_attendance(user_office_location)
        organizer = fake.name()
        
        description = fake.text(max_nb_chars=200)
        if meeting_url and random.random() < 0.5:
            description += f" Please join via {meeting_url}."
        
        # Simulate if the person is optional (about 30% chance).
        optional = random.choices([True, False], weights=[0.3, 0.7])[0]
        
        data.append({
            "meeting_id": meeting_id,
            "user_id": user_id,
            "office_location": user_office_location,
            "title": title,
            "start_time": start_time,
            "end_time": end_time,
            "location": location,
            "organizer": organizer,
            "description": description,
            "attendance_choice": attendance_choice,
            "optional": optional
        })

# Create DataFrame and save to CSV
df = pd.DataFrame(data)
df.to_csv("meeting_invites.csv", index=False)

# Preview the dataset
print(df.head())


  meeting_id  user_id                                   office_location  \
0        1-1        1  68081 David Pines Apt. 695, Port Brian, SD 75606   
1        1-2        1  68081 David Pines Apt. 695, Port Brian, SD 75606   
2        1-3        1  68081 David Pines Apt. 695, Port Brian, SD 75606   
3        1-4        1  68081 David Pines Apt. 695, Port Brian, SD 75606   
4        1-5        1  68081 David Pines Apt. 695, Port Brian, SD 75606   

                                 title          start_time  \
0  Operations Update - Special Session 2022-10-11 08:02:06   
1                       Client Meeting 2024-07-08 00:53:22   
2           Town Hall - Strategy Focus 2023-02-19 06:23:52   
3        Operations Update - Follow-Up 2022-06-21 13:14:05   
4         Strategy Session - Recurring 2024-07-11 14:14:05   

             end_time                                           location  \
0 2022-10-11 09:16:06  540 Mark Mews Suite 624, East Maryhaven, MA 98424   
1 2024-07-08 02:00:22   

In [22]:
# Save DataFrame to a CSV file
df.to_csv("readmefolder/meeting_invites.csv", index=False)