In [0]:
%pip install Faker

In [0]:
dbutils.library.restartPython()

In [0]:
import os
import json
import random
from datetime import datetime
from faker import Faker

fake = Faker()

# -------------------- Meta Directory Helpers --------------------

META_DIR = "meta"
os.makedirs(META_DIR, exist_ok=True)

def get_meta_file(entity):
    return os.path.join(META_DIR, f"{entity}.json")

def load_last_id(entity):
    """Load the last used ID for an entity from meta/<entity>.json"""
    meta_file = get_meta_file(entity)
    if os.path.exists(meta_file):
        with open(meta_file, "r") as f:
            meta = json.load(f)
            return meta.get("last_id", 0)
    return 0

def save_last_id(entity, last_id):
    """Save the last used ID for an entity to meta/<entity>.json"""
    meta_file = get_meta_file(entity)
    with open(meta_file, "w") as f:
        json.dump({"last_id": last_id}, f)

# -------------------- Customer --------------------

def generate_random_customer(customer_id):
    name = fake.name()
    email = fake.email()
    age = random.randint(18, 70)
    update_time = fake.date_time_between(start_date='-30d', end_date='now').isoformat()
    return {
        "customer_id": customer_id,
        "email": email,
        "profile": {
            "name": name,
            "age": age,
            "email": email
        },
        "update": update_time
    }

def generate_customers(n, output_dir="customers"):
    os.makedirs(output_dir, exist_ok=True)
    last_id = load_last_id("customer")

    for i in range(1, n + 1):
        cid = last_id + i
        data = generate_random_customer(cid)
        with open(os.path.join(output_dir, f"customer_{cid}.json"), "w") as f:
            json.dump(data, f, indent=2)

    save_last_id("customer", last_id + n)
    print(f"✅ Created {n} customers starting from ID {last_id + 1}")

# -------------------- Books --------------------

def generate_random_book(book_id):
    categories = ["Data Engineering", "Cloud", "Machine Learning", "Databases"]
    return {
        "book_id": book_id,
        "title": fake.catch_phrase(),
        "author": fake.name(),
        "category": random.choice(categories),
        "price": round(random.uniform(20, 100), 2)
    }

def generate_books(n, output_dir="books"):
    os.makedirs(output_dir, exist_ok=True)
    last_id = load_last_id("book")

    for i in range(1, n + 1):
        bid = last_id + i
        data = generate_random_book(bid)
        with open(os.path.join(output_dir, f"book_{bid}.json"), "w") as f:
            json.dump(data, f, indent=2)

    save_last_id("book", last_id + n)
    print(f"📚 Created {n} books starting from ID {last_id + 1}")

# -------------------- Orders --------------------

def generate_random_order(order_id, customer_ids, book_ids):
    customer_id = random.choice(customer_ids)
    book_id = random.choice(book_ids)
    quantity = random.randint(1, 5)
    price = round(random.uniform(10, 60), 2)
    total = round(quantity * price, 2)
    timestamp = fake.date_time_between(start_date='-15d', end_date='now').isoformat()
    return {
        "order_id": order_id,
        "timestamp": timestamp,
        "customer_id": customer_id,
        "quantity": quantity,
        "total": total,
        "books": book_id
    }

def generate_orders(n, output_dir="orders", customer_dir="customers", book_dir="books"):
    os.makedirs(output_dir, exist_ok=True)
    last_id = load_last_id("order")

    # Load existing customer and book IDs
    customer_ids = [
        int(f.split("_")[1].split(".")[0])
        for f in os.listdir(customer_dir)
        if f.startswith("customer_") and f.endswith(".json")
    ]
    book_ids = [
        int(f.split("_")[1].split(".")[0])
        for f in os.listdir(book_dir)
        if f.startswith("book_") and f.endswith(".json")
    ]

    if not customer_ids or not book_ids:
        print("⚠️  Cannot generate orders — make sure customers and books exist.")
        return

    for i in range(1, n + 1):
        oid = last_id + i
        data = generate_random_order(oid, customer_ids, book_ids)
        with open(os.path.join(output_dir, f"order_{oid}.json"), "w") as f:
            json.dump(data, f, indent=2)

    save_last_id("order", last_id + n)
    print(f"📦 Created {n} orders starting from ID {last_id + 1}")

# -------------------- Example Usage --------------------

# if __name__ == "__main__":
generate_customers(10)
generate_books(5)
generate_orders(5)