In [9]:
from faker import Faker
import pandas as pd
import numpy as np
import random

In [10]:
fake = Faker()

In [7]:
# Users table
def create_users(n):
    users = []
    for _ in range(n):
        users.append({
            "user_id": fake.uuid4(),
            "name": fake.name(),
            "email": fake.email(),
            "password": fake.password(),
            "date_of_birth": fake.date_of_birth(minimum_age=18, maximum_age=90),
            "created_at": fake.date_time_this_decade()
        })
    return pd.DataFrame(users)

In [6]:
# Categories table
def create_categories(n):
    categories = []
    for _ in range(n):
        categories.append({
            "category_id": fake.uuid4(),
            "category_name": fake.word()
        })
    return pd.DataFrame(categories)

In [7]:
# Products table
def create_products(n, category_ids):
    products = []
    for _ in range(n):
        products.append({
            "product_id": fake.uuid4(),
            "product_name": fake.word(),
            "price": round(random.uniform(10.0, 1000.0), 2),
            "category_id": random.choice(category_ids),
            "created_at": fake.date_time_this_decade()
        })
    return pd.DataFrame(products)

In [8]:
# Orders table
def create_orders(n, user_ids):
    orders = []
    for _ in range(n):
        orders.append({
            "order_id": fake.uuid4(),
            "user_id": random.choice(user_ids),
            "order_date": fake.date_time_this_decade(),
            "total_amount": round(random.uniform(50.0, 5000.0), 2)
        })
    return pd.DataFrame(orders)

In [9]:
# OrderItems table
def create_order_items(n, order_ids, product_ids):
    order_items = []
    for _ in range(n):
        order_items.append({
            "order_item_id": fake.uuid4(),
            "order_id": random.choice(order_ids),
            "product_id": random.choice(product_ids),
            "quantity": random.randint(1, 10),
            "price": round(random.uniform(10.0, 1000.0), 2)
        })
    return pd.DataFrame(order_items)

In [10]:
# Reviews table
def create_reviews(n, user_ids, product_ids):
    reviews = []
    for _ in range(n):
        reviews.append({
            "review_id": fake.uuid4(),
            "user_id": random.choice(user_ids),
            "product_id": random.choice(product_ids),
            "rating": random.randint(1, 5),
            "comment": fake.sentence(),
            "review_date": fake.date_time_this_decade()
        })
    return pd.DataFrame(reviews)

In [11]:
# Addresses table
def create_addresses(n, user_ids):
    addresses = []
    for _ in range(n):
        addresses.append({
            "address_id": fake.uuid4(),
            "user_id": random.choice(user_ids),
            "address": fake.address(),
            "city": fake.city(),
            "state": fake.state(),
            "zip_code": fake.zipcode(),
            "country": fake.country()
        })
    return pd.DataFrame(addresses)

In [12]:
# Payments table
def create_payments(n, order_ids):
    payments = []
    for _ in range(n):
        payments.append({
            "payment_id": fake.uuid4(),
            "order_id": random.choice(order_ids),
            "payment_date": fake.date_time_this_decade(),
            "payment_method": random.choice(["Credit Card", "PayPal", "Bank Transfer"]),
            "amount": round(random.uniform(50.0, 5000.0), 2)
        })
    return pd.DataFrame(payments)

In [2]:
# Generate data
num_users = 100
num_categories = 10
num_products = 100
num_orders = 200
num_order_items = 500
num_reviews = 300
num_addresses = 150
num_payments = 200

In [14]:
users_df = create_users(num_users)
categories_df = create_categories(num_categories)
products_df = create_products(num_products, categories_df['category_id'].tolist())
orders_df = create_orders(num_orders, users_df['user_id'].tolist())
order_items_df = create_order_items(num_order_items, orders_df['order_id'].tolist(), products_df['product_id'].tolist())
reviews_df = create_reviews(num_reviews, users_df['user_id'].tolist(), products_df['product_id'].tolist())
addresses_df = create_addresses(num_addresses, users_df['user_id'].tolist())
payments_df = create_payments(num_payments, orders_df['order_id'].tolist())

In [15]:
# Save to CSV files
users_df.to_csv('users.csv', index=False)
categories_df.to_csv('categories.csv', index=False)
products_df.to_csv('products.csv', index=False)
orders_df.to_csv('orders.csv', index=False)
order_items_df.to_csv('order_items.csv', index=False)
reviews_df.to_csv('reviews.csv', index=False)
addresses_df.to_csv('addresses.csv', index=False)
payments_df.to_csv('payments.csv', index=False)