In [1]:
import random
import csv
import faker
from datetime import datetime, timedelta

# Create instance of Faker to generate random data
fake = faker.Faker()

# Define possible values for categorical columns
product_categories = ['Electronics', 'Clothing', 'Food', 'Furniture', 'Toys']
regions = ['North', 'South', 'East', 'West']
payment_methods = ['Credit Card', 'PayPal', 'Cash']
order_statuses = ['Completed', 'Pending', 'Shipped']
age_groups = ['18-24', '25-34', '35-44', '45-54', '55+']
shipping_methods = ['Standard', 'Express', 'Same Day']
product_types = ['Laptop', 'T-shirt', 'Food Item', 'Chair', 'Smartphone']
discount_applied = ['Yes', 'No']
sales_channels = ['Online', 'In-store']

# Function to generate random numerical values
def generate_numerical_data():
    order_total = round(random.uniform(10, 500), 2)  # Random order total between 10 and 500 USD
    quantity = random.randint(1, 10)  # Random quantity between 1 and 10 items
    shipping_cost = round(random.uniform(5, 50), 2)  # Random shipping cost between 5 and 50 USD
    discount_value = round(random.uniform(0, 50), 2)  # Random discount value between 0 and 50 USD
    processing_time = random.randint(1, 48)  # Random order processing time between 1 and 48 hours
    return [order_total, quantity, shipping_cost, discount_value, processing_time]

# Function to generate random order dates
def generate_random_date(start_date, end_date):
    delta = end_date - start_date
    random_day = random.randint(0, delta.days)
    random_date = start_date + timedelta(days=random_day)
    return random_date.strftime("%Y-%m-%d")

# Function to create the data for one CSV
def generate_order_data(num_rows=500, start_date="2024-11-01", end_date="2024-11-05"):
    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    end_date = datetime.strptime(end_date, "%Y-%m-%d")
    data = []
    for _ in range(num_rows):
        order_id = fake.uuid4()
        product_category = random.choice(product_categories)
        region = random.choice(regions)
        payment_method = random.choice(payment_methods)
        order_status = random.choice(order_statuses)
        age_group = random.choice(age_groups)
        shipping_method = random.choice(shipping_methods)
        product_type = random.choice(product_types)
        discount = random.choice(discount_applied)
        sales_channel = random.choice(sales_channels)
        
        # Generate random order date
        order_date = generate_random_date(start_date, end_date)

        # Generate numerical columns
        numerical_data = generate_numerical_data()

        # Combine all data
        row = [order_id, product_category, region, payment_method, order_status, age_group,
               shipping_method, product_type, discount, sales_channel, order_date] + numerical_data
        data.append(row)
    return data

# Columns for the CSV file
columns = ['Order ID', 'Product Category', 'Customer Region', 'Payment Method', 'Order Status', 
           'Customer Age Group', 'Shipping Method', 'Product Type', 'Discount Applied', 
           'Sales Channel', 'Order Date', 'Order Total ($)', 'Quantity Ordered', 'Shipping Cost ($)', 
           'Discount Value ($)', 'Order Processing Time (hrs)']

# Function to write data to CSV
def write_csv(day_num, num_rows=500):
    filename = f"order_data_day_{day_num}.csv"
    data = generate_order_data(num_rows)
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(columns)  # Writing the header
        writer.writerows(data)    # Writing the data

# Generate 5 CSV files for 5 days
for day in range(1, 6):
    write_csv(day)
    print(f"CSV file for Day {day} generated.")


CSV file for Day 1 generated.
CSV file for Day 2 generated.
CSV file for Day 3 generated.
CSV file for Day 4 generated.
CSV file for Day 5 generated.
