In [None]:
import pandas as pd
import random

# Templates with racial comments
racial_templates = [
    "Black people are always late.",
    "Asians are good at math.",
    "Mexicans are lazy.",
    "That neighborhood is full of Asians.",
    "I don't trust Black people.",
    "Mexicans are taking our jobs.",
    "He looks like a terrorist.",
    "She must be from a poor background because she's Black.",
    "I bet he speaks Spanish because he's Mexican.",
    "All Asians know martial arts, right?",
    "They might not understand our company's values because they're Black.",
    "I'm concerned about how they'd handle our corporate events since they're Asian.",
    "We need someone who can represent us at high-level meetings, not someone like them.",
    "Their style of dress isn't quite what we're looking for; too Mexican.",
    "I'm not sure they'd be comfortable in our neighborhood full of white people.",
    "We need someone who can navigate our social circles, not Asians.",
    "They might struggle with our company's social activities; they're Black.",
    "I'm concerned about potential cultural misunderstandings with Mexicans.",
    "We typically look for candidates from certain backgrounds, not Asians.",
    "They might not have the right network for business development; they're Mexican.",
    "I'm not sure they'd fit in with our executive team; too Black.",
    "We need someone who understands our unwritten rules, not someone like them.",
    "Their hair/appearance might be too ethnic for our clients.",
    "I'm concerned about how they'd handle international assignments; they're Asian.",
    "We need someone who can adapt to our way of doing things; not Mexicans.",
    "They might not understand the nuances of our industry; they're Black.",
    "I'm not sure they have the right 'look' for this position; too Mexican.",
    "We typically promote people who remind us of ourselves, not Asians.",
    "They might struggle with our company's social expectations; they're Black.",
    "I'm concerned about how they'd handle high-pressure situations; too Mexican.",
    "We need someone who can navigate complex social dynamics, not Asians.",
    "Their background might not align with our target market; they're Black.",
    "I'm not sure they have the right educational pedigree; too Mexican.",
    "We typically look for candidates from certain neighborhoods, not Asians.",
    "They might not understand our company's unspoken norms; they're Black.",
    "I'm concerned about potential conflicts with our current team; they're Mexican.",
    "We need someone who can represent us at industry events, not Asians.",
    "Their communication style might not resonate with our clients; too Black.",
    "I'm not sure they have the right cultural fit for our team; they're Mexican.",
    "We typically hire people who share our background and experiences, not Asians.",
    "They don't really fit our company culture; too Black.",
    "We need someone who can relate to our core demographic, not Mexicans.",
    "Their name is too difficult to pronounce; they're Asian.",
    "They might not be comfortable in our fast-paced environment; too Black.",
    "I'm not sure they'd get along with our current team; they're Mexican.",
    "We need someone with a more professional appearance, not Asians.",
    "Their communication skills aren't up to our standards; too Black.",
    "They might not understand our target market; they're Mexican.",
    "We're looking for someone who 'looks the part'; not Asians.",
    "They lack the polish we're looking for; too Black.",
    "I'm concerned about potential language barriers with Mexicans.",
    "They might not fit in with our corporate image; they're Asian.",
    "We need someone our clients can easily relate to, not Black people.",
    "Their background is quite different from what we usually see; too Mexican.",
    "I'm not sure they'd be comfortable in our office culture; they're Asian.",
    "We typically hire from certain universities, not those like Black people.",
    "They might struggle with our work hours due to their background; too Mexican.",
    "I'm not sure they have the right connections for this role; they're Asian.",
    "Their accent might be an issue for client-facing roles; they're Black.",
    "All Asians are good at math, right?"
]

# Templates without racial comments
non_racial_templates = [
    "The project is due next week.",
    "I will attend the meeting.",
    "Let's have a team lunch tomorrow.",
    "The budget has been approved.",
    "We need to improve our performance.",
    "Can you send me the report?",
    "Our company is growing rapidly.",
    "She has excellent presentation skills.",
    "He is always on time for meetings.",
    "They work well under pressure.",
    "Don't forget to send the report.",
    "Lunch at the new restaurant.",
    "Dinner with clients at 7 PM tonight.",
    "Meeting at 10 AM tomorrow.",
    "Reminder: Parent-teacher meeting tomorrow at 2 PM.",
    "Reminder: Doctor's appointment tomorrow at 9 AM.",
    "Please update your contact information.",
    "The marketing team is developing a new advertising campaign.",
    "The human resources department is conducting employee performance reviews.",
    "The IT department is working on resolving technical issues.",
    "The sales team is attending a trade show next week.",
    "The customer service department is handling a high volume of calls.",
    "The legal department is reviewing the company's privacy policy.",
    "The facilities team is coordinating the office renovation project.",
    "The research and development team is testing a new product prototype.",
    "The quality control department is inspecting the latest batch of products.",
    "The logistics team is optimizing the supply chain process.",
    "The public relations department is preparing a press release.",
    "The human resources department is organizing a team-building event.",
    "The marketing team is analyzing customer feedback.",
    "The sales team is conducting product demonstrations for potential clients.",
    "The IT department is implementing new cybersecurity measures.",
    "The customer service department is launching a new chat support feature.",
    "The legal department is filing a patent application.",
    "The facilities team is upgrading the office security system.",
    "The research and development team is collaborating with a university on a research project.",
    "The quality control department is conducting supplier audits.",
    "The logistics team is negotiating better shipping rates with carriers.",
    "The public relations department is managing a social media crisis.",
    "The human resources department is implementing a new employee training program.",
    "The marketing team is planning a product launch event.",
    "The sales team is developing a new sales strategy.",
    "The IT department is migrating the company's data to the cloud.",
    "The customer service department is conducting a customer satisfaction survey.",
    "The legal department is handling a contract dispute.",
    "The facilities team is implementing a new recycling program.",
    "The research and development team is exploring new technologies.",
    "The quality control department is developing new testing procedures.",
    "The logistics team is expanding the company's distribution network.",
    "The public relations department is organizing a charity event.",
    "The human resources department is conducting a salary benchmarking study.",
    "The marketing team is developing a new brand identity.",
    "The sales team is training new sales representatives.",
    "The IT department is developing a new mobile app.",
    "The customer service department is implementing a new customer loyalty program.",
    "The legal department is updating the company's terms and conditions.",
    "The facilities team is conducting a safety audit.",
    "The research and development team is filing a new patent.",
    "The quality control department is implementing a new quality management system.",
    "The logistics team is piloting a new inventory management system."
]

# Function to generate synthetic messages
def generate_synthetic_message(label):
    if label == 1:
        message = random.choice(racial_templates)
    else:
        message = random.choice(non_racial_templates)
    return message

# Generate synthetic dataset
num_rows = 10000
data_synthetic = []

while len(data_synthetic) < num_rows:
    label = random.choice([0, 1])
    message = generate_synthetic_message(label)
    if [message, label] not in data_synthetic:
        data_synthetic.append([message, label])

# Create DataFrame and ensure no duplicates
df_synthetic = pd.DataFrame(data_synthetic, columns=["message", "label"])

# Save the synthetic dataset to a CSV file
output_file_path = '/Synthetic_Racial_Comments_Detection_Data.csv'
df_synthetic.to_csv(output_file_path, index=False)

# Display first few rows of the generated dataset
df_synthetic.head()
