In [1]:
import csv
import random
from datetime import datetime, timedelta

In [2]:
# Define 50 templates for No Hate/Toxic sentences (mix of neutral and action-based)
no_hate_toxic_templates = [
    "I really appreciate your perspective on this, {person}.",
    "{person}, that was a very insightful comment, thank you!",
    "You made a great point, {person}. This is definitely something to think about.",
    "{person}, I agree with you completely, well said!",
    "Thank you for sharing your thoughts, {person}. Its refreshing to hear something positive.",
    "{person}, I think your suggestion is really constructive and helpful.",
    "Thats a very interesting idea, {person}. I think we should explore it further.",
    "I appreciate the way you articulated that, {person}. Very thoughtful!",
    "Youre spot on, {person}, that was very well put.",
    "Its always nice to see a comment like yours, {person}. Great job!",
    "{person}, your reasoning here is very solid. I think you are onto something!",
    "What you said makes a lot of sense, {person}. Thanks for contributing.",
    "{person}, your comment really resonates with me. Excellent work!",
    "Great input, {person}. Its people like you who make the discussion better.",
    "Your positivity is infectious, {person}. Thanks for sharing that!",
    "Very insightful, {person}. I think we could all benefit from your viewpoint.",
    "Thanks for that, {person}. Its important to have a balanced discussion.",
    "That was a very thoughtful comment, {person}. Well done!",
    "{person}, I think youre absolutely right. Thanks for bringing this up!",
    "Such a constructive contribution, {person}. Keep it up!",
    "Youre absolutely right, {person}, its good to see this perspective.",
    "{person}, that was a well-articulated point. Keep contributing!",
    "I love seeing comments like yours, {person}. It really elevates the discussion.",
    "This is a great take, {person}. Youve made it a lot clearer for everyone.",
    "Thanks for keeping things civil, {person}. Your comment is appreciated.",
    "This was well thought out, {person}. Youre making a positive difference.",
    "{person}, you make this community a better place with your input.",
    "Great thoughts, {person}. This is the kind of conversation we need.",
    "{person}, youve really shed some light on this issue. Keep up the good work!",
    "That was a very detailed and helpful comment, {person}. Thank you!",
    "{person}, this is the kind of input that brings real value to discussions.",
    "We need more people like you, {person}. Thanks for being thoughtful.",
    "Keep up the great work, {person}. Youre making a real difference!",
    "{person}, your efforts are really paying off. Kudos to you!",
    "Thank you for taking the time to help out, {person}. Its really appreciated.",
    "{person}, youre doing an amazing job! Please continue contributing.",
    "Youre a star, {person}. Your actions are positively impacting the community.",
    "{person}, your dedication is truly admirable. Great job!",
    "Its clear how much effort youre putting in, {person}. Keep it up!",
    "Youre really setting an example here, {person}. Awesome work!",
    "Thank you for leading by example, {person}. Keep inspiring others!",
    "{person}, your contributions are valuable to everyone here. Well done!",
    "Your calm approach is refreshing, {person}. Keep contributing!",
    "{person}, its always a pleasure to see thoughtful contributions like yours.",
    "We need more constructive discussions like this, {person}. Youre doing great.",
    "{person}, your well-reasoned comment stands out. Thanks for sharing.",
    "That was very kind of you, {person}, to offer such insight.",
    "You really helped clear things up, {person}. Thank you!",
    "{person}, you always bring something constructive to the conversation. Thanks!",
    "Your contributions never go unnoticed, {person}. Thanks for that!",
]

# Define potential persons (10 persons)
person = [
    "you",
    "this user",
    "the commenter",
    "everyone",
    "the author",
    "the poster",
    "this participant",
    "the reader",
    "the speaker",
    "this member",
]

In [3]:
# Generate a unique ID (e.g., comment id)


def generate_unique_comment_id():
    return "".join(random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=7))

In [4]:
# Generate a unique link ID (e.g., "t3_" followed by 6 characters)


def generate_unique_link_id():
    return "t3_" + "".join(random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=7))

In [5]:
# Generate a random timestamp (date and time) in 2023


def generate_random_timestamp(year=2023):
    start_date = datetime(year, 1, 1)
    end_date = datetime(year, 12, 31)
    random_date = start_date + timedelta(
        days=random.randint(0, (end_date - start_date).days)
    )

    # Generate random hours, minutes, and seconds
    random_time = timedelta(
        hours=random.randint(0, 23),
        minutes=random.randint(0, 59),
        seconds=random.randint(0, 59),
    )

    # Combine the random date with the random time
    full_random_datetime = random_date + random_time

    # Return in the desired format
    return full_random_datetime.strftime("%m/%d/%Y %I:%M:%S %p")

In [6]:
# Generate a random 6-letter username (alphabets only)


def generate_username():
    return "".join(random.choices("abcdefghijklmnopqrstuvwxyz", k=6))

In [7]:
# Generate 5000 No Hate/Toxic comments with 50 templates and 10 persons


def generate_no_hate_toxic_comments():
    comments = [
        random.choice(no_hate_toxic_templates).format(person=random.choice(person))
        for _ in range(5000)
    ]
    return comments

In [8]:
# CSV file columns
csv_columns = [
    "text",
    "timestamp",
    "username",
    "link",
    "link_id",
    "parent_id",
    "id",
    "subreddit_id",
    "moderation",
    "year",
    "concatenated_count",
    "complete_thread",
    "gold_label",
    "generated_data",
]

In [9]:
# Constant values
subreddit_id = "t5_2qh8c"
moderation = "{'controversiality': 0, 'collapsed_reason_code': None, 'collapsed': False, 'collapsed_reason': None}"
year = "2023"
concatenated_count = 1
complete_thread = True
gold_label = "No Hate/Toxic"
generated_data = True

In [10]:
# Prepare rows for CSV
csv_data = []
no_hate_toxic_data = generate_no_hate_toxic_comments()

for i in range(5000):
    text = no_hate_toxic_data[i]
    timestamp = generate_random_timestamp()
    username = generate_username()
    link_id = generate_unique_link_id()
    parent_id = link_id  # Same within each row
    comment_id = generate_unique_comment_id()
    link = f"/r/wholesome/comments/13c3mt8/Positive_discussion_on_topic/{comment_id}/"

    # Create a row for the CSV
    row = {
        "text": text,
        "timestamp": timestamp,
        "username": username,
        "link": link,
        "link_id": link_id,
        "parent_id": parent_id,
        "id": comment_id,
        "subreddit_id": subreddit_id,
        "moderation": moderation,
        "year": year,
        "concatenated_count": concatenated_count,
        "complete_thread": complete_thread,
        "gold_label": gold_label,
        "generated_data": generated_data,
    }

    csv_data.append(row)

In [None]:
# Writing the data to a CSV file
csv_file = "no_hate_toxic_comments.csv"
with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=csv_columns)
    writer.writeheader()
    writer.writerows(csv_data)

print(f"CSV file '{csv_file}' generated successfully.")

CSV file 'no_hate_toxic_comments.csv' generated successfully.
