# Chapter 11: ORM API Features for Querying

Note: if you encounter strange errors, delete "store.db" and run all commands one-by-one.

Let's reset the database for this chapter (you need to change the database URL accordingly in "models.py"):

In [1]:
%run ../part2/models.py

2024-03-29 14:22:19,677 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-29 14:22:19,678 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("employee")
2024-03-29 14:22:19,679 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-03-29 14:22:19,679 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("employee")
2024-03-29 14:22:19,680 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-03-29 14:22:19,680 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("customer")
2024-03-29 14:22:19,681 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-03-29 14:22:19,682 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("customer")
2024-03-29 14:22:19,682 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-03-29 14:22:19,683 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("order")
2024-03-29 14:22:19,683 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-03-29 14:22:19,684 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("order")
2024-03-29 14:22:19,685 INFO sqlalchemy.engine.Engine [raw sql] ()

In [2]:
%run ../part2/insert.py

2024-03-29 14:22:19,776 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-29 14:22:19,777 INFO sqlalchemy.engine.Engine INSERT INTO employee (manager_id, name, is_manager, hire_date) VALUES (?, ?, ?, ?)
2024-03-29 14:22:19,778 INFO sqlalchemy.engine.Engine [generated in 0.00041s] (None, 'Alice', 1, '2024-03-28')
2024-03-29 14:22:19,779 INFO sqlalchemy.engine.Engine INSERT INTO employee (manager_id, name, is_manager, hire_date) VALUES (?, ?, ?, ?) RETURNING employee_id
2024-03-29 14:22:19,779 INFO sqlalchemy.engine.Engine [generated in 0.00010s (insertmanyvalues) 1/2 (ordered; batch not supported)] (1, 'Bob', 0, '2024-03-29')
2024-03-29 14:22:19,780 INFO sqlalchemy.engine.Engine INSERT INTO employee (manager_id, name, is_manager, hire_date) VALUES (?, ?, ?, ?) RETURNING employee_id
2024-03-29 14:22:19,780 INFO sqlalchemy.engine.Engine [insertmanyvalues 2/2 (ordered; batch not supported)] (1, 'Cathy', 0, '2024-03-29')
2024-03-29 14:22:19,781 INFO sqlalchemy.engine.Engine INSERT INTO

## ORM Loader Options

Load models and other necessary symbols:

In [3]:
import sys
import os

# Get the current working directory
current_dir = os.getcwd()

# Get the parent directory of the current directory
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

# Get the sibling directory's path
sibling_dir = os.path.join(parent_dir, 'part2')

# Add the sibling directory to the Python path
sys.path.append(sibling_dir)

In [4]:
from models import *

Create a session object:

In [5]:
session = SessionMaker()

### Column Loading Options

Necessary imports:

In [6]:
import logging
import threading

from models import Customer, Employee, Order, OrderDetail, SessionMaker
from sqlalchemy import select, update
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import (Session, defer, load_only, selectinload, undefer,
                            undefer_group)

Using the `load_only()` loader option:

In [7]:
stmt = (
    select(Employee)
    .filter_by(employee_id=1)
    .options(load_only(Employee.name))
)

The generated SQL selects only the primary key and "name" columns:

In [8]:
print(stmt)

SELECT employee.employee_id, employee.name 
FROM employee 
WHERE employee.employee_id = :employee_id_1


In [9]:
employee = session.scalar(stmt)

2024-03-29 14:22:19,858 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-29 14:22:19,859 INFO sqlalchemy.engine.Engine SELECT employee.employee_id, employee.name 
FROM employee 
WHERE employee.employee_id = ?
2024-03-29 14:22:19,860 INFO sqlalchemy.engine.Engine [generated in 0.00052s] (1,)


When an unloaded attribute is accessed, lazy loading is triggered. In this case,
it fetches the "hire_date" column, emitting the following SQL:

In [10]:
if employee is not None:
    print("# Accessing employee.hire_date (using lazy loading):")
    print(f"{employee.name} is hired on {employee.hire_date}.")

# Accessing employee.hire_date (using lazy loading):
2024-03-29 14:22:19,864 INFO sqlalchemy.engine.Engine SELECT employee.hire_date AS employee_hire_date 
FROM employee 
WHERE employee.employee_id = ?
2024-03-29 14:22:19,865 INFO sqlalchemy.engine.Engine [generated in 0.00064s] (1,)
Alice is hired on 2024-03-28.


Remove the instance from the session, since we're going to load the same row again later:

In [11]:
session.expunge(employee)

Using the `defer()` loader option:

In [12]:
stmt = (
    select(Employee)
    .filter_by(employee_id=1)
    .options(
        defer(Employee.hire_date),
        defer(Employee.manager_id),
        defer(Employee.is_manager),
    )
)

In [13]:
print(stmt)

SELECT employee.employee_id, employee.name 
FROM employee 
WHERE employee.employee_id = :employee_id_1


In [14]:
employee = session.scalar(stmt)

2024-03-29 14:22:19,882 INFO sqlalchemy.engine.Engine SELECT employee.employee_id, employee.name 
FROM employee 
WHERE employee.employee_id = ?
2024-03-29 14:22:19,882 INFO sqlalchemy.engine.Engine [generated in 0.00068s] (1,)


In [15]:
if employee is not None:
    print("# Loading deferred column:")
    print(f"{employee.name} is hired on {employee.hire_date}.")

# Loading deferred column:
2024-03-29 14:22:19,887 INFO sqlalchemy.engine.Engine SELECT employee.hire_date AS employee_hire_date 
FROM employee 
WHERE employee.employee_id = ?
2024-03-29 14:22:19,888 INFO sqlalchemy.engine.Engine [generated in 0.00069s] (1,)
Alice is hired on 2024-03-28.


In [16]:
session.expunge(employee)

Configuring Deferral on Column Mappings:

```python
class Customer(Base):
    __tablename__ = "customer"

    address: Mapped[str_255] = mapped_column(
        deferred=True,
        deferred_group="customer_attributes",
    )
    ...
```

In [17]:
stmt = select(Customer)

The "address" column is excluded from the initial fetch:

In [18]:
print(stmt)

SELECT customer.customer_id, customer.first_name, customer.last_name, customer.email 
FROM customer


`undefer()` and `undefer_group()`:

Applying the `undefer()` loader option to a deferred column:

In [19]:
stmt = select(Customer).options(undefer(Customer.address))

`Customer.address` is now eagerly loaded:

In [20]:
print(stmt)

SELECT customer.customer_id, customer.first_name, customer.last_name, customer.address, customer.email 
FROM customer


Alternatively, you can use `undefer_group()` with the target group name to
achieve the same effect:

In [21]:
stmt = select(Customer).options(undefer_group("customer_attributes"))

In [22]:
print(stmt)

SELECT customer.customer_id, customer.first_name, customer.last_name, customer.address, customer.email 
FROM customer


To undefer all deferred columns at once, you can use a wildcard:

In [23]:
stmt = select(Customer).options(undefer("*"))

In [24]:
print(stmt)

SELECT NULL AS anon_1, customer.customer_id, customer.first_name, customer.last_name, customer.address, customer.email 
FROM customer


Enabling `raiseload` on deferred columns:

In [25]:
stmt = (
    select(Employee)
    .filter_by(employee_id=1)
    .options(load_only(Employee.name, raiseload=True))
)

In [26]:
print(stmt)

SELECT employee.employee_id, employee.name 
FROM employee 
WHERE employee.employee_id = :employee_id_1


In [27]:
employee = session.scalar(stmt)

2024-03-29 14:22:19,938 INFO sqlalchemy.engine.Engine SELECT employee.employee_id, employee.name 
FROM employee 
WHERE employee.employee_id = ?
2024-03-29 14:22:19,939 INFO sqlalchemy.engine.Engine [generated in 0.00058s] (1,)


In [28]:
print("Accessing employee name:", employee.name)

Accessing employee name: Alice


An error occurs due to `raiseload=True`:

In [29]:
try:
    print("Accessing other columns:", employee.hire_date)
except Exception as e:
    print(e)

'Employee.hire_date' is not available due to raiseload=True


In [30]:
session.expunge(employee)

### Relationship Loading Techniques

Relationship loading strategies include: lazy loading (the default behavior), select IN
eager loading, joined eager loading, and raise loading, and others.

You can configure loading strategies with the `lazy` parameter in
`relationship()`.

`lazy="select"` is the default lazy loading strategy, other possible values
include `"selectin"` and `"joined"`:

```python
class Customer(Base):

    orders: Mapped[list[Order]] = relationship(
        lazy="select",
        back_populates="customer",
        init=False,
        repr=False,
        order_by="desc(Order.order_id)",
    )
    ...
```

Eagerly loading relationships using `Select.options()`:

In [31]:
stmt = (
    select(Order)
    .options(
        selectinload(Order.order_details)
        .joinedload(OrderDetail.product)
    )
    .filter_by(order_id=1)
)

In [32]:
print(stmt)

SELECT "order".order_id, "order".customer_id, "order".employee_id, "order".order_datetime, "order".is_shipped 
FROM "order" 
WHERE "order".order_id = :order_id_1


The SQL emitted eagerly loads both order details and products:

In [33]:
order = session.scalar(stmt)

2024-03-29 14:22:19,963 INFO sqlalchemy.engine.Engine SELECT "order".order_id, "order".customer_id, "order".employee_id, "order".order_datetime, "order".is_shipped 
FROM "order" 
WHERE "order".order_id = ?
2024-03-29 14:22:19,964 INFO sqlalchemy.engine.Engine [generated in 0.00072s] (1,)
2024-03-29 14:22:19,970 INFO sqlalchemy.engine.Engine SELECT order_detail.order_id AS order_detail_order_id, order_detail.product_id AS order_detail_product_id, order_detail.quantity AS order_detail_quantity, product_1.product_id AS product_1_product_id, product_1.product_name AS product_1_product_name, product_1.unit_price AS product_1_unit_price, product_1.units_in_stock AS product_1_units_in_stock, product_1.type AS product_1_type 
FROM order_detail LEFT OUTER JOIN product AS product_1 ON product_1.product_id = order_detail.product_id 
WHERE order_detail.order_id IN (?)
2024-03-29 14:22:19,978 INFO sqlalchemy.engine.Engine [generated in 0.00861s] (1,)


```sql
SELECT
    order_detail.order_id AS order_detail_order_id,
    order_detail.product_id AS order_detail_product_id,
    order_detail.quantity AS order_detail_quantity,
    product_1.product_id AS product_1_product_id,
    product_1.product_name AS product_1_product_name,
    product_1.unit_price AS product_1_unit_price,
    product_1.units_in_stock AS product_1_units_in_stock,
    product_1.type AS product_1_type
FROM
    order_detail
LEFT OUTER JOIN product AS product_1
ON
    product_1.product_id = order_detail.product_id
WHERE
    order_detail.order_id IN (1)
```

In [34]:
if order is not None:
    print("# Accessing related data does not trigger lazy loading: ")
    print(f"Content of order #{order.order_id}:")
    for od in order.order_details:
        print(f"{od.product.product_name} x{od.quantity}")

# Accessing related data does not trigger lazy loading: 
Content of order #1:
phone x1
phone screen protector x1
headphone x1


In [35]:
session.expunge(order)

### The "N + 1" problem

An example of the "N + 1" problem:

In [36]:
# the 1 part of the "N+1" problem
customers = session.scalars(select(Customer))

for customer in customers:
    print(f"> Customer: #{customer.customer_id}")
    for order in customer.orders:  # the N part of the "N+1" problem
        print(f">   order #{order.order_id} at {order.order_datetime}")

2024-03-29 14:22:19,999 INFO sqlalchemy.engine.Engine SELECT customer.customer_id, customer.first_name, customer.last_name, customer.email 
FROM customer
2024-03-29 14:22:20,000 INFO sqlalchemy.engine.Engine [generated in 0.00126s] ()
> Customer: #1
2024-03-29 14:22:20,005 INFO sqlalchemy.engine.Engine SELECT "order".order_id AS order_order_id, "order".customer_id AS order_customer_id, "order".employee_id AS order_employee_id, "order".order_datetime AS order_order_datetime, "order".is_shipped AS order_is_shipped 
FROM "order" 
WHERE ? = "order".customer_id ORDER BY "order".order_id DESC
2024-03-29 14:22:20,006 INFO sqlalchemy.engine.Engine [generated in 0.00072s] (1,)
>   order #2 at 2024-03-29 14:22:19.817572
>   order #1 at 2024-03-29 14:22:19.814764
> Customer: #2
2024-03-29 14:22:20,007 INFO sqlalchemy.engine.Engine SELECT "order".order_id AS order_order_id, "order".customer_id AS order_customer_id, "order".employee_id AS order_employee_id, "order".order_datetime AS order_order_dat

In [37]:
session.expunge_all()

Tackling the N+1 problem with eager loading:

In [38]:
customers = session.scalars(select(Customer).options(selectinload(Customer.orders)))
for customer in customers:
    print(f"> Customer: #{customer.customer_id}")
    for order in customer.orders:
        print(f">   order #{order.order_id} at {order.order_datetime}")

2024-03-29 14:22:20,019 INFO sqlalchemy.engine.Engine SELECT customer.customer_id, customer.first_name, customer.last_name, customer.email 
FROM customer
2024-03-29 14:22:20,019 INFO sqlalchemy.engine.Engine [generated in 0.00098s] ()
2024-03-29 14:22:20,022 INFO sqlalchemy.engine.Engine SELECT "order".customer_id AS order_customer_id, "order".order_id AS order_order_id, "order".employee_id AS order_employee_id, "order".order_datetime AS order_order_datetime, "order".is_shipped AS order_is_shipped 
FROM "order" 
WHERE "order".customer_id IN (?, ?) ORDER BY "order".order_id DESC
2024-03-29 14:22:20,022 INFO sqlalchemy.engine.Engine [generated in 0.00052s] (1, 2)
> Customer: #1
>   order #2 at 2024-03-29 14:22:19.817572
>   order #1 at 2024-03-29 14:22:19.814764
> Customer: #2
>   order #3 at 2024-03-29 14:22:19.819909


In [39]:
session.expunge_all()

Tackling the N+1 problem with a join:

In [40]:
stmt = (
    select(
        Customer.customer_id,
        Order.order_id,
        Order.order_datetime,
    )
    .select_from(Customer)
    .join(Customer.orders)
    .order_by(Customer.customer_id, Order.order_id)
)
results = session.execute(stmt)

current_customer_id = None
for row in results:
    customer_id = row.customer_id
    if current_customer_id != customer_id:
        current_customer_id = customer_id
        print(f"> Customer: #{current_customer_id}")
    print(f">   order #{row.order_id} at {row.order_datetime}")

2024-03-29 14:22:20,034 INFO sqlalchemy.engine.Engine SELECT customer.customer_id, "order".order_id, "order".order_datetime 
FROM customer JOIN "order" ON customer.customer_id = "order".customer_id ORDER BY customer.customer_id, "order".order_id
2024-03-29 14:22:20,036 INFO sqlalchemy.engine.Engine [generated in 0.00194s] ()
> Customer: #1
>   order #1 at 2024-03-29 14:22:19.814764
>   order #2 at 2024-03-29 14:22:19.817572
> Customer: #2
>   order #3 at 2024-03-29 14:22:19.819909


In [41]:
session.expunge_all()

## ORM Execution Options

Function modifying order shipping status in another thread:

In [42]:
def change_shipping_status(customer_id, status):
    with SessionMaker() as session:
        s1 = (
            select(Order.order_id)
            .where(Order.customer_id == customer_id)
            .order_by(Order.order_datetime)
            .limit(1)
        )
        order_id = session.scalar(s1)
        if order_id is not None:
            s2 = (
                update(Order)
                .where(Order.order_id == order_id)
                .values(is_shipped=status)
            )
            session.execute(s2)
            session.commit()

Demonstrating the usage of the `populate_existing` execution option:

In [43]:
TARGET_CUSTOMER_ID = 2

In [44]:
stmt = (
    select(Customer)
    .where(Customer.customer_id == TARGET_CUSTOMER_ID)
    .options(selectinload(Customer.orders))
)

Initial loading populates the identity map:

In [45]:
print("(1) Initial loading:")
customer = session.scalar(stmt)
if customer is not None:
    print("Original:", customer.orders)

(1) Initial loading:
2024-03-29 14:22:20,062 INFO sqlalchemy.engine.Engine SELECT customer.customer_id, customer.first_name, customer.last_name, customer.email 
FROM customer 
WHERE customer.customer_id = ?
2024-03-29 14:22:20,062 INFO sqlalchemy.engine.Engine [generated in 0.00069s] (2,)
2024-03-29 14:22:20,064 INFO sqlalchemy.engine.Engine SELECT "order".customer_id AS order_customer_id, "order".order_id AS order_order_id, "order".employee_id AS order_employee_id, "order".order_datetime AS order_order_datetime, "order".is_shipped AS order_is_shipped 
FROM "order" 
WHERE "order".customer_id IN (?) ORDER BY "order".order_id DESC
2024-03-29 14:22:20,064 INFO sqlalchemy.engine.Engine [cached since 0.04216s ago] (2,)
Original: [Order(order_id=3, customer_id=2, employee_id=None, order_datetime=datetime.datetime(2024, 3, 29, 14, 22, 19, 819909), is_shipped=False)]


Change customer order's shipping status to true in another thread:

In [46]:
worker_thread = threading.Thread(
    target=change_shipping_status,
    args=(TARGET_CUSTOMER_ID, True),
)
worker_thread.start()
worker_thread.join()

2024-03-29 14:22:20,070 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-29 14:22:20,071 INFO sqlalchemy.engine.Engine SELECT "order".order_id 
FROM "order" 
WHERE "order".customer_id = ? ORDER BY "order".order_datetime
 LIMIT ? OFFSET ?
2024-03-29 14:22:20,072 INFO sqlalchemy.engine.Engine [generated in 0.00039s] (2, 1, 0)
2024-03-29 14:22:20,073 INFO sqlalchemy.engine.Engine UPDATE "order" SET is_shipped=? WHERE "order".order_id = ?
2024-03-29 14:22:20,074 INFO sqlalchemy.engine.Engine [generated in 0.00042s] (1, 3)
2024-03-29 14:22:20,075 INFO sqlalchemy.engine.Engine COMMIT


Normal loading will not refresh the orders in the identity map:

In [47]:
print("(2) Normal loading after the update in another thread:")
customer = session.scalar(stmt)
if customer is not None:
    print("Updated but not refreshed:", customer.orders)

(2) Normal loading after the update in another thread:
2024-03-29 14:22:20,082 INFO sqlalchemy.engine.Engine SELECT customer.customer_id, customer.first_name, customer.last_name, customer.email 
FROM customer 
WHERE customer.customer_id = ?
2024-03-29 14:22:20,082 INFO sqlalchemy.engine.Engine [cached since 0.02087s ago] (2,)
2024-03-29 14:22:20,084 INFO sqlalchemy.engine.Engine SELECT "order".customer_id AS order_customer_id, "order".order_id AS order_order_id, "order".employee_id AS order_employee_id, "order".order_datetime AS order_order_datetime, "order".is_shipped AS order_is_shipped 
FROM "order" 
WHERE "order".customer_id IN (?) ORDER BY "order".order_id DESC
2024-03-29 14:22:20,084 INFO sqlalchemy.engine.Engine [cached since 0.06211s ago] (2,)
Updated but not refreshed: [Order(order_id=3, customer_id=2, employee_id=None, order_datetime=datetime.datetime(2024, 3, 29, 14, 22, 19, 819909), is_shipped=False)]


But with populate_existing, it reloads and you can see the changes:

In [48]:
print("(3) Loading with populate existing enabled:")
stmt = stmt.execution_options(populate_existing=True)
customer = session.scalar(stmt)
if customer is not None:
    print("Updated and refreshed:", customer.orders)

(3) Loading with populate existing enabled:
2024-03-29 14:22:20,090 INFO sqlalchemy.engine.Engine SELECT customer.customer_id, customer.first_name, customer.last_name, customer.email 
FROM customer 
WHERE customer.customer_id = ?
2024-03-29 14:22:20,091 INFO sqlalchemy.engine.Engine [generated in 0.00075s] (2,)
2024-03-29 14:22:20,092 INFO sqlalchemy.engine.Engine SELECT "order".customer_id AS order_customer_id, "order".order_id AS order_order_id, "order".employee_id AS order_employee_id, "order".order_datetime AS order_order_datetime, "order".is_shipped AS order_is_shipped 
FROM "order" 
WHERE "order".customer_id IN (?) ORDER BY "order".order_id DESC
2024-03-29 14:22:20,092 INFO sqlalchemy.engine.Engine [cached since 0.07032s ago] (2,)
Updated and refreshed: [Order(order_id=3, customer_id=2, employee_id=None, order_datetime=datetime.datetime(2024, 3, 29, 14, 22, 19, 819909), is_shipped=True)]


This example serves solely to illustrate the `populate_existing` execution
option. It is important to keep in mind that the recommended approach for
obtaining updated status is to simply commit the session (and re-query).
Alternatively, you can expire session objects to achieve the same result.

Remember to close the session:

In [49]:
session.close()

2024-03-29 14:22:20,098 INFO sqlalchemy.engine.Engine ROLLBACK
