In [None]:
pip install faker

Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.1.0


In [None]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import csv
from datetime import datetime
import os

# Config
NUM_ROWS = 1_000_000
CHUNK_SIZE = 100_000
OUTPUT_FILE = "synthetic_support_tickets_1M.csv"

# Initialize Faker
fake = Faker()

# Retail categories and real-world-like products
CATEGORIES = {
    "Mobile Accessories": [
        "Amazon Brand - Solimo Designer 3D Printed Case for iPhone 13",
        "Samsung Galaxy S21 Tempered Glass Screen Protector",
        "Anker USB-C Fast Charger 20W",
        "OnePlus Nord Silicone Protective Back Cover",
        "Spigen Ultra Hybrid Case for Pixel 6"
    ],
    "Home Decor": [
        "Stone & Beam Contemporary Doily Wool Farmhouse Rug",
        "IKEA LACK Coffee Table",
        "Wayfair Sheer Curtain Panels Set",
        "Philips Hue Smart Table Lamp",
        "Urban Outfitters Tufted Floor Cushion"
    ],
    "Electronics": [
        "JBL Flip 5 Bluetooth Speaker",
        "Sony WH-1000XM4 Wireless Headphones",
        "Mi Power Bank 20000mAh",
        "Fire TV Stick 4K",
        "Fitbit Charge 5 Fitness Tracker"
    ]
}

ticket_types = ["Complaint", "Query", "Feedback", "Issue"]
ticket_statuses = ["Open", "Closed", "Resolved", "Escalated"]
ticket_channels = ["Email", "Phone", "Chat", "Web"]
priorities = ["Low", "Medium", "High"]

# Generate descriptive, category-specific issue
def generate_ticket_description(product, category, ticket_type):
    descriptions = {
        "Mobile Accessories": {
            "Complaint": f"The {product} doesn't fit properly and the cutouts are off.",
            "Query": f"Is the {product} compatible with my device model?",
            "Feedback": f"The {product} design is nice, but I expected more durability.",
            "Issue": f"The {product} started to peel within a few days of use."
        },
        "Home Decor": {
            "Complaint": f"My {product} arrived with stains and poor stitching quality.",
            "Query": f"Can you confirm if the {product} is machine washable?",
            "Feedback": f"The {product} is beautiful and soft. Matches my interior perfectly!",
            "Issue": f"The {product} sheds fibers excessively even after vacuuming."
        },
        "Electronics": {
            "Complaint": f"The {product} stopped working after just one week of use.",
            "Query": f"Does the {product} support fast charging or Bluetooth 5.0?",
            "Feedback": f"Loved the sound and battery life of the {product}.",
            "Issue": f"My {product} gets unusually hot while in use. Is this expected?"
        }
    }
    return descriptions[category][ticket_type]

# Generate a single support ticket
def generate_ticket(row_id):
    category = random.choice(list(CATEGORIES.keys()))
    product = random.choice(CATEGORIES[category])
    ticket_type = random.choice(ticket_types)

    name = fake.name()
    email = f"{name.lower().replace(' ', '.')}@example.com"
    age = random.randint(18, 65)
    gender = random.choice(["Male", "Female", "Non-binary"])
    purchase_date = fake.date_between(start_date='-1y', end_date='today')

    status = random.choice(ticket_statuses)
    channel = random.choice(ticket_channels)
    priority = random.choices(priorities, weights=[0.5, 0.3, 0.2])[0]
    first_response = random.randint(10, 2880)  # minutes
    resolution_time = random.randint(1, 168)  # hours
    satisfaction = random.choices([5, 4, 3, 2, 1], weights=[0.4, 0.3, 0.2, 0.05, 0.05])[0]

    subject = ticket_type + " regarding " + product
    description = generate_ticket_description(product, category, ticket_type)
    resolution = (
        "Replacement issued and return initiated." if ticket_type == "Complaint"
        else "Customer query answered with additional product care tips."
    )

    return {
        "Ticket ID": f"TCKT{1000000 + row_id}",
        "Customer Name": name,
        "Customer Email": email,
        "Customer Age": age,
        "Customer Gender": gender,
        "Product Purchased": product,
        "Date of Purchase": purchase_date,
        "Ticket Type": ticket_type,
        "Ticket Subject": subject,
        "Ticket Description": description,
        "Ticket Status": status,
        "Resolution": resolution,
        "Ticket Priority": priority,
        "Ticket Channel": channel,
        "First Response Time": f"{first_response} minutes",
        "Time to Resolution": f"{resolution_time} hours",
        "Customer Satisfaction Rating": satisfaction
    }

# Create and write header to CSV
with open(OUTPUT_FILE, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=[
        "Ticket ID", "Customer Name", "Customer Email", "Customer Age", "Customer Gender",
        "Product Purchased", "Date of Purchase", "Ticket Type", "Ticket Subject",
        "Ticket Description", "Ticket Status", "Resolution", "Ticket Priority",
        "Ticket Channel", "First Response Time", "Time to Resolution",
        "Customer Satisfaction Rating"
    ])
    writer.writeheader()

# Generate and write in chunks
for start in range(0, NUM_ROWS, CHUNK_SIZE):
    print(f"Generating rows {start} to {start + CHUNK_SIZE - 1}")
    chunk = [generate_ticket(i) for i in range(start, start + CHUNK_SIZE)]
    with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=chunk[0].keys())
        writer.writerows(chunk)

print(f"\n✅ Completed: {OUTPUT_FILE} generated with 1M rows.")


Generating rows 0 to 99999
Generating rows 100000 to 199999
Generating rows 200000 to 299999
Generating rows 300000 to 399999
Generating rows 400000 to 499999
Generating rows 500000 to 599999
Generating rows 600000 to 699999
Generating rows 700000 to 799999
Generating rows 800000 to 899999
Generating rows 900000 to 999999

✅ Completed: synthetic_support_tickets_1M.csv generated with 1M rows.


Python Code for Return/Refund policy

In [None]:
import pandas as pd

# Recreate data after code execution environment was reset
product_catalog = {
    "Mobile Accessories": [
        "Amazon Brand - Solimo Designer 3D Printed Case for iPhone 13",
        "Samsung Galaxy S21 Tempered Glass Screen Protector",
        "Anker USB-C Fast Charger 20W",
        "OnePlus Nord Silicone Protective Back Cover",
        "Spigen Ultra Hybrid Case for Pixel 6"
    ],
    "Home Decor": [
        "Stone & Beam Contemporary Doily Wool Farmhouse Rug",
        "IKEA LACK Coffee Table",
        "Wayfair Sheer Curtain Panels Set",
        "Philips Hue Smart Table Lamp",
        "Urban Outfitters Tufted Floor Cushion"
    ],
    "Electronics": [
        "JBL Flip 5 Bluetooth Speaker",
        "Sony WH-1000XM4 Wireless Headphones",
        "Mi Power Bank 20000mAh",
        "Fire TV Stick 4K",
        "Fitbit Charge 5 Fitness Tracker"
    ]
}

category_policy_map = {
    "Mobile Accessories": {
        "return_window_days": 15,
        "refund_allowed": "Yes",
        "replacement_allowed": "Yes",
        "restocking_fee": "No",
        "description": "Must be returned in original condition with packaging."
    },
    "Home Decor": {
        "return_window_days": 30,
        "refund_allowed": "Yes",
        "replacement_allowed": "Yes",
        "restocking_fee": "10%",
        "description": "Return accepted only if unused and undamaged."
    },
    "Electronics": {
        "return_window_days": 10,
        "refund_allowed": "Yes",
        "replacement_allowed": "Yes (limited)",
        "restocking_fee": "No",
        "description": "Refund only on manufacturing defects, no cosmetic damage."
    }
}

policy_data = []

for category, products in product_catalog.items():
    policy = category_policy_map[category]
    for product in products:
        policy_data.append({
            "product_name": product,
            "category": category,
            "return_window_days": policy["return_window_days"],
            "refund_allowed": policy["refund_allowed"],
            "replacement_allowed": policy["replacement_allowed"],
            "restocking_fee": policy["restocking_fee"],
            "return_policy_description": policy["description"]
        })

policy_df = pd.DataFrame(policy_data)

# Save to CSV
output_path = "/mnt/data/return_refund_policy.csv"
policy_df.to_csv(output_path, index=False)

output_path
