In [1]:
import database_utils
import utils
DB_PATH = "products.db"

In [2]:
database_utils.create_db(DB_PATH, n_employees=200, n_products=20000, n_txns_per_product=200)

SQLite database 'products.db' created with a 'transactions' table (event-sourced), 'products' table, and 'employees' table.


In [3]:
schema_text = database_utils.get_schema(DB_PATH)
utils.print_html(schema_text, f"Table Schemas in {DB_PATH}")

In [4]:
query = """
    select * from products where is_limited=1
    order by release_date asc
    limit 10
    """
results = database_utils.execute_sql(
    query=query,
    db_path=DB_PATH)
utils.print_html(results, query)

id,name,brand,category,color,is_limited,notes,release_date
13621,New Balance hoodie,New,Balance,green,1,,1996-05-08 00:29:04.012810
3041,Puma hat,Puma,hat,white,1,,1996-05-08 04:17:49.268379
2400,Nike shoes,Nike,shoes,cream,1,,1996-05-09 02:35:41.688484
3702,New Balance cleats,New,Balance,cream,1,,1996-05-10 00:13:30.332409
18635,Nike shoes,Nike,shoes,cream,1,,1996-05-13 16:55:32.460942
1673,Adidas hoodie,Adidas,hoodie,black,1,,1996-05-25 22:28:09.848026
15506,Adidas hat,Adidas,hat,cream,1,,1996-06-12 02:26:20.692115
10816,Nike t-shirt,Nike,t-shirt,cream,1,,1996-07-15 03:29:58.799178
4858,New Balance hoodie,New,Balance,white,1,,1996-08-01 13:45:41.154169
4470,Reebok hat,Reebok,hat,black,1,,1996-08-11 19:12:59.373602


Let's do an example of joining tables...

Which employee is responsible for releasing the most products?

In [5]:
query = """
    select employees.name, COUNT(transactions.id)
    from employees
    join transactions on employees.id=transactions.employee_id
    where transactions.action="insert"
    """
results = database_utils.execute_sql(
    query=query,
    db_path=DB_PATH)
utils.print_html(results, "Which employee is responsible for releasing the most products?")

name,COUNT(transactions.id)
Watanabe Deepti,20000


Because of how I've seeded the database, all insertions were credited to a single employee. This is not an ideal situation, but is not unlike what could occur in real databases.

Now let's investigate sales

_Who are the top performing employees in terms of sales?_

Note that, due to how the database is currently setup (and how the business is flattened), there is more than one valid way to answer this question.
1. Of all employees, who has accumulated the highest total sales
2. Of only employees in the sales organization, who has the highest total sales

In [6]:
query="""
    select employees.name, employees.department, SUM(-transactions.qty_delta * transactions.unit_price) as total_revenue
    from employees
    join transactions on employees.id=transactions.employee_id
    where transactions.action="sale"
    group by employees.name
    order by total_revenue desc
    limit 10;
    """
results = database_utils.execute_sql(
    query=query,
    db_path=DB_PATH)
utils.print_html(results, "Who are the top performing employees in terms of sales?")

name,department,total_revenue
Zephyr Ishi,stock,33400011.81
Goodall Zephyr,stock,27635131.59
Bruno Charles,sales,27629624.41
Eileen Zephyr,sales,27558541.54
Abby Bruno,corporate,22456015.54
Henrik Frederic,stock,22399749.95
Watanabe Zephyr,fitting,22362481.22
Deepti Watanabe,corporate,22348796.71
Henrik Deepti,fitting,22345446.3
Ishi Henrik,stock,22312047.2


_Who are the top performing employees in terms of sales this calendar year?_

In [7]:
# query=f"""
#     select employees.name, employees.department, SUM(-transactions.qty_delta * transactions.unit_price) as total_revenue
    # group by employees.name
query=f"""
    select employees.name, employees.department, transactions.ts as transaction_date, -transactions.qty_delta * transactions.unit_price as revenue
    from employees
    join transactions on employees.id=transactions.employee_id
    where transactions.action="sale" and transactions.ts>=date('now', 'start of year')
    order by transaction_date asc
    limit 10;
    """
results = database_utils.execute_sql(
    query=query,
    db_path=DB_PATH)
utils.print_html(results, "Who are the top performing employees in terms of sales this calendar year?")

name,department,transaction_date,revenue
Eileen Abby,sales,2025-10-13 21:23:34,622.26
Abby Bruno,corporate,2025-10-13 21:23:34,622.26
Watanabe Charles,fitting,2025-10-13 21:23:34,483.98
Goodall Zephyr,stock,2025-10-13 21:23:34,414.84
Ishi Charles,corporate,2025-10-13 21:23:34,622.26
Frederic Charles,stock,2025-10-13 21:23:34,622.26
Bruno Charles,sales,2025-10-13 21:23:34,553.12
Deepti Frederic,sales,2025-10-13 21:23:34,132.76
Bruno Zephyr,stock,2025-10-13 21:23:34,62.17
Watanabe Bruno,sales,2025-10-13 21:23:34,248.68


Unfortunately, because the timestamp field for transactions is autogenerated based on when the database entry is entered, it appears that it is not possible to look at different segments of time. All the data was generated at the time of running `database_utils.create_db(...)`. So I may need to add a column to the transactions table to backdate some of them.

In [8]:
database_utils.execute_sql(
    query="select date('now', 'start of year')",
    db_path=DB_PATH)

Unnamed: 0,"date('now', 'start of year')"
0,2025-01-01
