In [0]:
import csv
import uuid
import random
from datetime import datetime

def generate_product_csv(path="/dataingestion/product", num_records=5):
    """
    Generates a CSV file with dummy product data and writes it to DBFS.
    Each call creates a unique file.
    """
    try:
        dbutils.fs.mkdirs(path)
    except Exception as e:
        print(f"Error creating directory: {e}")
        
    # Define categories
    categories = ["Electronics", "Clothing", "Books", "Home", "Toys"]

    # Define unique filename
    filename = f"/dbfs{path}/data_{uuid.uuid4().hex}.csv"

    # Open the file and write CSV
    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(file)
        # Header
        writer.writerow(["product_id", "product_name", "category", "price", "created_at"])
        
        # Rows
        for _ in range(num_records):
            writer.writerow([
                str(uuid.uuid4()),
                "Product_" + str(random.randint(100, 999)),
                random.choice(categories),
                round(random.uniform(10.0, 500.0), 2),
                datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            ])

    print(f"Generated {num_records} records at {filename}")
    # return filename


In [0]:
def generate_product_csv_new(path="/dataingestion/product", num_records=5):
    """
    Generates a CSV file with dummy product data and writes it to DBFS.
    Each call creates a unique file.
    This version includes 'currency' and multi-value 'tags' columns.
    """
    # Define categories and currencies
    categories = ["Electronics", "Clothing", "Books", "Home", "Toys"]
    currencies = ["USD", "EUR"]
    tags_pool = ["sale", "new", "popular", "limited", "exclusive"]

    # Unique filename
    filename = f"/dbfs{path}/data_{uuid.uuid4().hex}.csv"

    # Open the file and write CSV
    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(file)
        # Header includes extra columns
        writer.writerow([
            "product_id",
            "product_name",
            "category",
            "price",
            "created_at",
            "currency",
            "tags"
        ])
        
        # Rows
        for _ in range(num_records):
            # Randomly select 2-3 tags
            selected_tags = ",".join(random.sample(tags_pool, k=random.randint(2,3)))
            writer.writerow([
                str(uuid.uuid4()),
                "Product_" + str(random.randint(100, 999)),
                random.choice(categories),
                round(random.uniform(10.0, 500.0), 2),
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                random.choice(currencies),
                selected_tags
            ])

    print(f"Generated {num_records} records at {filename}")
