In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Constants for data generation
np.random.seed(42)
number_records = 1000
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)

In [4]:
def random_date(start, end):
    return start + timedelta(
        days=np.random.randint(0, (end - start).days)
    )

product_mapping = {
    "Electronics": {"items": ["Phone", "Laptop", "Headphones"], "price_range": (100, 999)},
    "Apparel": {"items": ["Shirt", "Shoes", "Jacket"], "price_range": (10, 250)},
    "Furniture": {"items": ["Sofa", "Table", "Chair"], "price_range": (50, 999)},
    "Books": {"items": ["Mystery", "Romance", "Science Fiction"], "price_range": (10, 150)},
    "Toys": {"items": ["Car", "Doll", "Puzzle"], "price_range": (5, 99)}
}

def assign_product_and_price(category):
    """Assign a product name and unit price based on the product category."""
    product = np.random.choice(product_mapping[category]["items"])
    price = round(np.random.uniform(*product_mapping[category]["price_range"]), 2)
    return product, price

# Dummy data generation
data = {
    "Order ID": [f"ORD-{str(i).zfill(5)}" for i in range(1, number_records + 1)],
    "Order Date": [random_date(start_date, end_date) for _ in range(number_records)],
    "Customer ID": [f"CUST-{np.random.randint(100, 200)}" for _ in range(number_records)],
    "Product Category": np.random.choice(list(product_mapping.keys()), number_records),
    "Quantity Sold": np.random.randint(1, 10, number_records),
    "Region": np.random.choice(["North", "South", "East", "West"], number_records)
}

# Create DataFrame
df = pd.DataFrame(data)

# Assign Product Name and Unit Price based on Product Category
df[["Product Name", "Unit Price"]] = df["Product Category"].apply(
    lambda category: assign_product_and_price(category)
).apply(pd.Series)

# Calculate Total Sales
df["Total Sales"] = df["Quantity Sold"] * df["Unit Price"]

# Save the dataset to CSV in Colab's file system
file_path = "Customer_Sales_Dataset.csv"
df.to_csv(file_path, index=False)

# Provide a download link
from google.colab import files
files.download(file_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>