In [5]:
import pandas as pd
import random
from faker import Faker

fake = Faker()

# Define the number of emails
num_emails = 3000

# Platforms for structured meetings
platforms = ['Zoom', 'Microsoft Teams', 'Google Meet', 'WebEx']

subjects_structured = ['Formal Meeting Invitation', 'Scheduled Team Meeting', 'Project Discussion Invite']
subjects_unstructured = ['Quick Catch-up', 'Sync Up', 'Informal Chat', 'Let’s Discuss']

# Meeting link templates
meeting_links = {
    'Zoom': 'https://zoom.us/j/',
    'Microsoft Teams': 'https://teams.microsoft.com/l/meetup-join/',
    'Google Meet': 'https://meet.google.com/',
    'WebEx': 'https://webex.com/meet/'
}

# Structured and unstructured email body templates
structured_bodies = [
    "Please join the scheduled meeting by following this link: {link}. We will be discussing project updates and next steps.",
    "You are invited to a planned meeting concerning our ongoing projects. Details can be accessed here: {link}. Looking forward to your valuable inputs.",
    "This is a reminder to attend the upcoming meeting that is scheduled at the below link: {link}. Please be prepared to review the agenda attached in previous correspondence.",
    "Kindly follow this link {link} to join the meeting where we will finalize the details discussed in our last session.",
    "The team meeting will take place using the following link: {link}. Please ensure to go through the listed topics to be covered.",
    "Join our weekly check-in using this link {link}. We will discuss progress and adjustments needed going forward.",
    "To participate in the upcoming policy review meeting, please use this link {link}. Your feedback is valuable.",
    "Please join the budget alignment session at this link {link}. It's crucial for all department heads to attend.",
    "Access the quarterly forecast meeting through this link {link}. We will be discussing budget allocations and future projections.",
    "You are required to attend the risk assessment meeting at {link}. Please prepare all relevant reports beforehand.",
    "Join us for a detailed discussion on the upcoming project initiatives. Here's your link: {link}. Please review the attached agenda.",
    "We are looking forward to your participation in tomorrow's strategy session. Connect with us at {link} for pivotal discussions.",
    "Please click this link {link} to join the webinar where key project milestones will be reviewed. Ensure your participation.",
    "The monthly department meeting will occur via {link}. We'll cover the quarterly earnings and departmental shifts.",
    "This is your invitation to our next project review meeting. The details are available at {link}. Your input is crucial.",
    "We'll be expecting you at the strategy planning session. Here's your joining link: {link}. The session's focus will be on upcoming market trends.",
    "Reminder: The project closure meeting is set for this week. Join using this link {link} to participate in the final discussions.",
    "You're invited to attend the technical review meeting at {link}. Please be prepared to discuss the issues listed in the prior memo.",
    "Please attend the planning workshop by accessing this link {link}. It’s vital that you’re present as we will finalize the project scopes.",
    "The link for today’s training session is {link}. Please ensure to log in on time as we have a full schedule.",
    "Join our weekly check-in using this link {link}. We will discuss progress and adjustments needed going forward.",
    "To participate in the upcoming policy review meeting, please use this link {link}. Your feedback is valuable.",
    "Please join the budget alignment session at this link {link}. It's crucial for all department heads to attend.",
    "The client presentation will take place via this link {link}. Ensure that all project updates are ready to be shared.",
    "This link {link} will direct you to our next compliance training session. Attendance is mandatory for all team members.",
    "Access the quarterly forecast meeting through this link {link}. We will be discussing budget allocations and future projections.",
    "You are required to attend the risk assessment meeting at {link}. Please prepare all relevant reports beforehand.",
    "Here is the link for the stakeholder's meeting: {link}. Please confirm your availability by replying to this email.",
    "Your presence is requested at the innovation brainstorming session here: {link}. Bring your ideas and insights.",
    "For the upcoming review of operational efficiency, please join us via this link: {link}. Your input will be crucial."
]

unstructured_bodies = [
    "Hey, are you free to catch up about the project at around 3 PM today? Let me know!",
    "Can we have a quick call sometime this afternoon to discuss the new updates?",
    "Just touching base to see if you can meet tomorrow at 10 AM to finalize the details?",
    "Could we set up a time today to chat about the upcoming deadlines? I’m thinking around 2 PM?",
    "I’m available later today for that discussion we postponed. Is 4 PM good for you?",
    "Let’s sync up about the new team assignments. How does your schedule look for a quick call today?",
    "We need to align on the strategy adjustments. Are you available for a brief discussion this afternoon?",
    "Could you spare a few minutes today around noon to talk about the client feedback?",
    "It’s time we caught up on the project status. Do you have a moment to discuss it today?",
    "I have some ideas about our next steps in the project. Can we discuss them over a quick call today?",
    "Let’s discuss the tasks for next week. What time suits you best for a short meeting today?",
    "We should review the recent changes in the project scope. Can you meet at 1 PM today?",
    "I need to go over some issues that have come up. Are you free to talk at 3 PM?",
    "Hey, let's catch up briefly today. How does your afternoon look for a quick chat?",
    "It’s crucial we discuss our approach for the new market segment. Can you talk at 11 AM?",
    "We need to finalize our presentation details. Can we talk about it today at 5 PM?",
    "I have updates from today's session. Can we review them together? What time are you free?",
    "Let’s set a time to discuss how we’re handling the workflow changes. Are you free later today?",
    "I’d like to get your insights on our current milestones. Can we connect anytime today?"
]

def generate_email_data():
    data = []
    for _ in range(num_emails):
        if random.choice([True, False]):  # Randomly choose between structured and unstructured
            platform = random.choice(platforms)
            meeting_link = meeting_links[platform] + fake.lexify('????')
            body_text = random.choice(structured_bodies).format(link=meeting_link)
            email_data = {
                'From': fake.email(),
                'To': fake.email(),
                'Subject': random.choice(subjects_structured),
                'Sent Time': fake.date_time_this_month(),
                'Meeting Date': fake.date_this_month(),
                'Meeting Time': fake.time_object(),
                'Platform': platform,
                'Body': body_text
            }
        else:
            body_text = random.choice(unstructured_bodies)
            email_data = {
                'From': fake.email(),
                'To': fake.email(),
                'Subject': random.choice(subjects_unstructured),
                'Sent Time': fake.date_time_this_month(),
                'Meeting Date': 'N/A',
                'Meeting Time': 'N/A',
                'Platform': 'N/A',
                'Body': body_text
            }
        data.append(email_data)
    return data

# Create DataFrame
df = pd.DataFrame(generate_email_data())

# Save to CSV
df.to_csv('varied_email_dataset.csv', index=False)

print("Varied email dataset generated and saved to 'varied_email_dataset.csv'.")


Varied email dataset generated and saved to 'varied_email_dataset.csv'.
