# Data Modeling and Partitioning in Azure Cosmos DB

## WebStore Demo

In [1]:
# Setup

import json
import uuid

def printJson(result):
    print(json.dumps(result, indent=4))
    
def printCount(result):
    print("Retrieved " + str(len(result)) + " document(s)")
    
def printRequestCharge(container):
    print("Cost: " + container.client_connection.last_response_headers["x-ms-request-charge"] + " RU(s)")

In [2]:
database                    = cosmos_client.get_database_client("webstore-v2")
customerContainer           = database.get_container_client("customer")
productCategoryContainer    = database.get_container_client("productCategory")
productContainer            = database.get_container_client("product")

### Querying for Customers

In [3]:
# Retrieve a single customer (point read)

customer = customerContainer.read_item(
    "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
    "46192BCF-E8BB-4140-A0F1-B8764A7941E7")

printJson(customer)
printRequestCharge(customerContainer)

{
    "id": "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
    "title": "",
    "firstName": "Amanda",
    "lastName": "Cook",
    "emailAddress": "amanda3@adventure-works.com",
    "phoneNumber": "252-555-0177",
    "creationDate": "2013-08-03T00:00:00",
    "addresses": [
        {
            "addressLine1": "4098 Woodcrest Dr.",
            "addressLine2": "",
            "city": "Everett",
            "state": "WA ",
            "country": "US",
            "zipCode": "98201"
        },
        {
            "addressLine1": "9187 Vista Del Sol",
            "addressLine2": "",
            "city": "Everett",
            "state": "WA ",
            "country": "US",
            "zipCode": "98201"
        }
    ],
    "password": {
        "hash": "TME3aaOlz5NEtcLKhgRxwgjyS/lZfiX+IDHTTQYBtjI=",
        "salt": "wNb+tCk="
    },
    "_rid": "2GxpAORPEkqpFAAAAAAAAA==",
    "_self": "dbs/2GxpAA==/colls/2GxpAORPEko=/docs/2GxpAORPEkqpFAAAAAAAAA==/",
    "_etag": "\"0000f614-0000-0100-0000-5ee3b26

In [4]:
# Retrieve a single customer (SQL query)

customer = list(customerContainer.query_items(
    query = "SELECT * FROM c WHERE c.id = '46192BCF-E8BB-4140-A0F1-B8764A7941E7'"))[0]

printJson(customer)
printRequestCharge(customerContainer)

{
    "id": "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
    "title": "",
    "firstName": "Amanda",
    "lastName": "Cook",
    "emailAddress": "amanda3@adventure-works.com",
    "phoneNumber": "252-555-0177",
    "creationDate": "2013-08-03T00:00:00",
    "addresses": [
        {
            "addressLine1": "4098 Woodcrest Dr.",
            "addressLine2": "",
            "city": "Everett",
            "state": "WA ",
            "country": "US",
            "zipCode": "98201"
        },
        {
            "addressLine1": "9187 Vista Del Sol",
            "addressLine2": "",
            "city": "Everett",
            "state": "WA ",
            "country": "US",
            "zipCode": "98201"
        }
    ],
    "password": {
        "hash": "TME3aaOlz5NEtcLKhgRxwgjyS/lZfiX+IDHTTQYBtjI=",
        "salt": "wNb+tCk="
    },
    "_rid": "2GxpAORPEkqpFAAAAAAAAA==",
    "_self": "dbs/2GxpAA==/colls/2GxpAORPEko=/docs/2GxpAORPEkqpFAAAAAAAAA==/",
    "_etag": "\"0000f614-0000-0100-0000-5ee3b26

### Querying for Product Categories

In [5]:
# Retrieve all product categories

productCategories = list(productCategoryContainer.query_items(
    query = "SELECT * FROM c WHERE c.type = 'category'"))

printJson(productCategories)
printCount(productCategories)
printRequestCharge(productCategoryContainer)

[
    {
        "id": "1B6FBF23-C483-4A0E-93D2-831C57E304A1",
        "name": "Accessories, Pumps",
        "type": "category",
        "_rid": "2GxpAMjSFoABAAAAAAAAAA==",
        "_self": "dbs/2GxpAA==/colls/2GxpAMjSFoA=/docs/2GxpAMjSFoABAAAAAAAAAA==/",
        "_etag": "\"0000af97-0000-0100-0000-5ee3b2990000\"",
        "_attachments": "attachments/",
        "_ts": 1591980697
    },
    {
        "id": "DFD4B838-464C-445F-A665-885C67FCA3CA",
        "name": "Components, Road Frames",
        "type": "category",
        "_rid": "2GxpAMjSFoACAAAAAAAAAA==",
        "_self": "dbs/2GxpAA==/colls/2GxpAMjSFoA=/docs/2GxpAMjSFoACAAAAAAAAAA==/",
        "_etag": "\"0000b097-0000-0100-0000-5ee3b2990000\"",
        "_attachments": "attachments/",
        "_ts": 1591980697
    },
    {
        "id": "00E05695-E93C-4EFD-BD23-147FE6BF1562",
        "name": "Components, Handlebars",
        "type": "category",
        "_rid": "2GxpAMjSFoADAAAAAAAAAA==",
        "_self": "dbs/2GxpAA==/colls/2GxpAMjS

In [6]:
# Project only the product category id and name properties

productCategories = list(productCategoryContainer.query_items(
    query = "SELECT c.id, c.name FROM c WHERE c.type = 'category'"))

printJson(productCategories)
printCount(productCategories)
printRequestCharge(productCategoryContainer)

[
    {
        "id": "1B6FBF23-C483-4A0E-93D2-831C57E304A1",
        "name": "Accessories, Pumps"
    },
    {
        "id": "DFD4B838-464C-445F-A665-885C67FCA3CA",
        "name": "Components, Road Frames"
    },
    {
        "id": "00E05695-E93C-4EFD-BD23-147FE6BF1562",
        "name": "Components, Handlebars"
    },
    {
        "id": "19F44083-6506-413F-BF5B-FA4F2B846B6E",
        "name": "Components, Chains"
    },
    {
        "id": "556E8ABE-0DC2-4AFF-8843-12D664B0B236",
        "name": "Accessories, Bottles and Cages"
    },
    {
        "id": "F7456100-FCCF-4F0D-A45B-FAC2FC5CF38C",
        "name": "Clothing, Tights"
    },
    {
        "id": "4792FDEC-E87F-4776-BD22-38D62BE87276",
        "name": "Clothing, Gloves"
    },
    {
        "id": "E8424F0A-F986-445A-B2D2-DA4F8B7AC934",
        "name": "Clothing, Bib-Shorts"
    },
    {
        "id": "6A8AF598-881C-42B1-80A8-9048097510A2",
        "name": "Components, Brakes"
    },
    {
        "id": "FCBB21EC-100A-4E9F-8D2

### Using the Change Feed for Denormalization

In [7]:
database                    = cosmos_client.get_database_client("webstore-v3")
productCategoryContainer    = database.get_container_client("productCategory")
productContainer            = database.get_container_client("product")

In [8]:
# Retrieve the first 5 products from category 'Clothing, Shorts'

top5Products = list(productContainer.query_items(
    query = "SELECT TOP 5 * FROM c WHERE c.categoryId = 'C7324EF3-D951-45D9-A345-A82EAE344394'"))

printJson(top5Products)
printRequestCharge(customerContainer)

[
    {
        "id": "BCFA4C0C-2E72-4C3F-AA36-6C8D933EB4A4",
        "categoryId": "C7324EF3-D951-45D9-A345-A82EAE344394",
        "categoryName": "Clothing, Shorts",
        "sku": "SH-M897-L",
        "name": "Men's Sports Shorts, L",
        "description": "The product called \"Men's Sports Shorts, L\"",
        "price": 59.99,
        "tags": [
            {
                "id": "4C4BF0B5-2586-440B-B43E-49BAFEC89234",
                "name": "Tag-33"
            },
            {
                "id": "B8ED6540-C2CD-496C-A6B7-28AFB6957EA4",
                "name": "Tag-152"
            }
        ],
        "_rid": "iGt5AOS4B2gvAAAAAAAAAA==",
        "_self": "dbs/iGt5AA==/colls/iGt5AOS4B2g=/docs/iGt5AOS4B2gvAAAAAAAAAA==/",
        "_etag": "\"1500c584-0000-0100-0000-5ee3e8310000\"",
        "_attachments": "attachments/",
        "_ts": 1591994417
    },
    {
        "id": "F59ECC09-CAA5-4D3C-87A7-16945A92EA2D",
        "categoryId": "C7324EF3-D951-45D9-A345-A82EAE344394",
      

In [31]:
# Rename product category to 'Clothing, Fun Shorts' (trigger change feed for Azure Function)

productCategory = productCategoryContainer.read_item("C7324EF3-D951-45D9-A345-A82EAE344394", "category")
printRequestCharge(productCategoryContainer)

productCategory["name"] = "Clothing, Fun Shorts"

productCategoryContainer.replace_item("C7324EF3-D951-45D9-A345-A82EAE344394", productCategory)
printRequestCharge(productCategoryContainer)

Cost: 1 RU(s)
Cost: 10.67 RU(s)


In [33]:
# Retrieve the same products again (category name is updated by Azure Function)

top5Products = list(productContainer.query_items(
    query = "SELECT TOP 5 * FROM c WHERE c.categoryId = 'C7324EF3-D951-45D9-A345-A82EAE344394'"))

printJson(top5Products)
printRequestCharge(customerContainer)

[
    {
        "id": "BCFA4C0C-2E72-4C3F-AA36-6C8D933EB4A4",
        "categoryId": "C7324EF3-D951-45D9-A345-A82EAE344394",
        "categoryName": "Clothing, Fun Shorts",
        "sku": "SH-M897-L",
        "name": "Men's Sports Shorts, L",
        "description": "The product called \"Men's Sports Shorts, L\"",
        "price": 59.99,
        "tags": [
            {
                "id": "4C4BF0B5-2586-440B-B43E-49BAFEC89234",
                "name": "Tag-33"
            },
            {
                "id": "B8ED6540-C2CD-496C-A6B7-28AFB6957EA4",
                "name": "Tag-152"
            }
        ],
        "_rid": "iGt5AOS4B2gvAAAAAAAAAA==",
        "_self": "dbs/iGt5AA==/colls/iGt5AOS4B2g=/docs/iGt5AOS4B2gvAAAAAAAAAA==/",
        "_etag": "\"2a00adab-0000-0100-0000-5ee4e7360000\"",
        "_attachments": "attachments/",
        "_ts": 1592059702
    },
    {
        "id": "F59ECC09-CAA5-4D3C-87A7-16945A92EA2D",
        "categoryId": "C7324EF3-D951-45D9-A345-A82EAE344394",
  

### Querying for Sales Orders

In [34]:
database            = cosmos_client.get_database_client("webstore-v4")
customerContainer   = database.get_container_client("customer")

In [35]:
# Retrieve all sales order for a customer

orders = list(customerContainer.query_items(
    query = """
        SELECT *
        FROM c
        WHERE c.customerId = '46192BCF-E8BB-4140-A0F1-B8764A7941E7' AND c.type = 'salesOrder'
    """))

printJson(orders)
printCount(orders)
printRequestCharge(customerContainer)

[
    {
        "id": "5FA7A020-A5FD-4EC8-B458-C0C5AFA9F365",
        "type": "salesOrder",
        "customerId": "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
        "orderDate": "2013-08-03T00:00:00",
        "shipDate": "2013-08-10T00:00:00",
        "details": [
            {
                "sku": "PK-7098",
                "name": "Patch Kit/8 Patches",
                "price": 2.29,
                "quantity": 1
            }
        ],
        "_rid": "3TFFAInuwsN-eAAAAAAAAA==",
        "_self": "dbs/3TFFAA==/colls/3TFFAInuwsM=/docs/3TFFAInuwsN-eAAAAAAAAA==/",
        "_etag": "\"04006b42-0000-0100-0000-5ee3b7ff0000\"",
        "_attachments": "attachments/",
        "_ts": 1591982079
    },
    {
        "id": "60EF6711-FE5B-4291-8088-96BCC7F56B4E",
        "type": "salesOrder",
        "customerId": "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
        "orderDate": "2013-11-22T00:00:00",
        "shipDate": "2013-11-29T00:00:00",
        "details": [
            {
                "sku":

In [36]:
# Retrieve a customer with all their sales orders

customerWithOrders = list(customerContainer.query_items(
    query = """
        SELECT *
        FROM c
        WHERE c.customerId = '46192BCF-E8BB-4140-A0F1-B8764A7941E7'
        ORDER BY c.type
    """))

printJson(customerWithOrders)
printCount(customerWithOrders)
printRequestCharge(customerContainer)

[
    {
        "id": "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
        "type": "customer",
        "customerId": "46192BCF-E8BB-4140-A0F1-B8764A7941E7",
        "title": "",
        "firstName": "Amanda",
        "lastName": "Cook",
        "emailAddress": "amanda3@adventure-works.com",
        "phoneNumber": "252-555-0177",
        "creationDate": "2013-08-03T00:00:00",
        "addresses": [
            {
                "addressLine1": "4098 Woodcrest Dr.",
                "addressLine2": "",
                "city": "Everett",
                "state": "WA ",
                "country": "US",
                "zipCode": "98201"
            },
            {
                "addressLine1": "9187 Vista Del Sol",
                "addressLine2": "",
                "city": "Everett",
                "state": "WA ",
                "country": "US",
                "zipCode": "98201"
            }
        ],
        "password": {
            "hash": "TME3aaOlz5NEtcLKhgRxwgjyS/lZfiX+IDHTTQYBtjI

### Querying for Top Customers

In [37]:
# Retrieve the top 10 customers by number of sales orders

# Fails because we need to explicitly indicate that we want a cross-partition query
try:
    top10Customers = list(customerContainer.query_items(
        query = """
            SELECT TOP 10 c.id, c.firstName, c.lastName, c.salesOrderCount
            FROM c
            WHERE c.type = 'customer'
            ORDER BY c.salesOrderCount DESC
        """))
except Exception as e:
    print(e)

(BadRequest) Cross partition query is required but disabled. Please set x-ms-documentdb-query-enablecrosspartition to true, specify x-ms-documentdb-partitionkey, or revise your query to avoid this exception.
ActivityId: c29e8aa4-6816-4c18-8d7c-3c06cdcf7649, Microsoft.Azure.Documents.Common/2.11.0


In [38]:
# Try again with cross partition query enabled

top10Customers = list(customerContainer.query_items(
    query = """
        SELECT TOP 10 c.id, c.firstName, c.lastName, c.salesOrderCount
        FROM c
        WHERE c.type = 'customer'
        ORDER BY c.salesOrderCount DESC
    """,
    enable_cross_partition_query = True))

printJson(top10Customers)
printRequestCharge(customerContainer)

[
    {
        "id": "7708618E-D6F8-4975-91A5-7F202848F9B4",
        "firstName": "Mason",
        "lastName": "Roberts",
        "salesOrderCount": 28
    },
    {
        "id": "44A6D5F6-AF44-4B34-8AB5-21C5DC50926E",
        "firstName": "Dalton",
        "lastName": "Perez",
        "salesOrderCount": 28
    },
    {
        "id": "E46CE3B0-3154-42E9-84CB-447742A7A7A2",
        "firstName": "Daniel",
        "lastName": "Davis",
        "salesOrderCount": 27
    },
    {
        "id": "DB085093-192F-460B-9291-C2A3FC4BC4B5",
        "firstName": "Hailey",
        "lastName": "Patterson",
        "salesOrderCount": 27
    },
    {
        "id": "D3DC2659-70A4-4850-ACF4-0A8CABA660B1",
        "firstName": "Jason",
        "lastName": "Griffin",
        "salesOrderCount": 27
    },
    {
        "id": "CF120FE8-B4D8-4C5A-BC21-16A8F50EC3EB",
        "firstName": "Charles",
        "lastName": "Jackson",
        "salesOrderCount": 27
    },
    {
        "id": "B833E0CB-67D3-4657-A773-70

In [39]:
# Create a new sales order for a customer

customerId = "44A6D5F6-AF44-4B34-8AB5-21C5DC50926E"
salesOrderId = str(uuid.uuid4())

newOrder = {
    "id": salesOrderId,
    "type": "salesOrder",
    "customerId": customerId,
    "details": [
        {
            "sku": "BK-R50R-44",
            "name": "Road-650 Red, 44",
            "price": 419.4589,
            "quantity": 1
        },
        {
            "sku": "BK-R68R-52",
            "name": "Road-450 Red, 52",
            "price": 874.794,
            "quantity": 1
        }
    ]
}

# Call the stored procedure to insert the new order and update the customer order count in a transaction
customerContainer.scripts.execute_stored_procedure(
    "spCreateSalesOrder",
    partition_key = customerId,
    params = newOrder)

printRequestCharge(customerContainer)

Cost: 22.14 RU(s)


In [40]:
# Top 10 query now shows updated result from salesOrderCount incremented by stored procedure

top10Customers = list(customerContainer.query_items(
    query = """
        SELECT TOP 10 c.id, c.firstName, c.lastName, c.salesOrderCount
        FROM c
        WHERE c.type = 'customer'
        ORDER BY c.salesOrderCount DESC
    """,
    enable_cross_partition_query = True))

printJson(top10Customers)
printRequestCharge(customerContainer)

[
    {
        "id": "44A6D5F6-AF44-4B34-8AB5-21C5DC50926E",
        "firstName": "Dalton",
        "lastName": "Perez",
        "salesOrderCount": 29
    },
    {
        "id": "7708618E-D6F8-4975-91A5-7F202848F9B4",
        "firstName": "Mason",
        "lastName": "Roberts",
        "salesOrderCount": 28
    },
    {
        "id": "E46CE3B0-3154-42E9-84CB-447742A7A7A2",
        "firstName": "Daniel",
        "lastName": "Davis",
        "salesOrderCount": 27
    },
    {
        "id": "DB085093-192F-460B-9291-C2A3FC4BC4B5",
        "firstName": "Hailey",
        "lastName": "Patterson",
        "salesOrderCount": 27
    },
    {
        "id": "D3DC2659-70A4-4850-ACF4-0A8CABA660B1",
        "firstName": "Jason",
        "lastName": "Griffin",
        "salesOrderCount": 27
    },
    {
        "id": "CF120FE8-B4D8-4C5A-BC21-16A8F50EC3EB",
        "firstName": "Charles",
        "lastName": "Jackson",
        "salesOrderCount": 27
    },
    {
        "id": "B833E0CB-67D3-4657-A773-70