# Container Queries

In [3]:
# Setup
%run "notebooks/SQL Queries - setup.ipynb"

## FROM Clause

**Can just use 'c' for container**

In [4]:
query(families, """

SELECT * FROM c

""")

3 document(s) selected (charge= 2.36 RUs)

[
    {
        "id": "AndersenFamily",
        "lastName": "Andersen",
        "parents": [
            {
                "firstName": "Thomas",
                "relationship": "father"
            },
            {
                "firstName": "Mary Kay",
                "relationship": "mother"
            }
        ],
        "children": [
            {
                "firstName": "Henriette Thaulow",
                "gender": "female",
                "grade": 5,
                "pets": [
                    {
                        "givenName": "Fluffy",
                        "type": "Rabbit"
                    }
                ]
            }
        ],
        "location": {
            "state": "WA",
            "county": "King",
            "city": "Seattle"
        },
        "geo": {
            "type": "Point",
            "coordinates": [
                -122.3295,
                47.60357
            ]
        },
        "is

**Get city and state**

In [5]:
query(families, """

SELECT
    c.location.city,
    c.location.state
FROM
    c

""")

3 document(s) selected (charge= 2.33 RUs)

[
    {
        "city": "Seattle",
        "state": "WA"
    },
    {
        "city": "Forest Hills",
        "state": "NY"
    },
    {
        "city": "NY",
        "state": "NY"
    }
]


**Can use container name**

In [6]:
query(families, """

SELECT
    Families.location.city,
    Families.location.state
FROM
    Families

""")

3 document(s) selected (charge= 2.33 RUs)

[
    {
        "city": "Seattle",
        "state": "WA"
    },
    {
        "city": "Forest Hills",
        "state": "NY"
    },
    {
        "city": "NY",
        "state": "NY"
    }
]


**Can "double"-alias**

In [7]:
query(families, """

SELECT
    f.location.city,
    f.location.state
FROM
    Families AS f

""")

3 document(s) selected (charge= 2.33 RUs)

[
    {
        "city": "Seattle",
        "state": "WA"
    },
    {
        "city": "Forest Hills",
        "state": "NY"
    },
    {
        "city": "NY",
        "state": "NY"
    }
]


**Reduce source to subset of children (per family)**

In [8]:
query(families, """

SELECT *
FROM c.children

""")

3 document(s) selected (charge= 3.11 RUs)

[
    [
        {
            "firstName": "Henriette Thaulow",
            "gender": "female",
            "grade": 5,
            "pets": [
                {
                    "givenName": "Fluffy",
                    "type": "Rabbit"
                }
            ]
        }
    ],
    [
        {
            "givenName": "Michelle",
            "gender": "female",
            "grade": 1
        },
        {
            "givenName": "John",
            "gender": "male",
            "grade": 7,
            "pets": [
                {
                    "givenName": "Tweetie",
                    "type": "Bird"
                }
            ]
        }
    ],
    [
        {
            "familyName": "Merriam",
            "givenName": "Jesse",
            "gender": "female",
            "grade": 6,
            "pets": [
                {
                    "givenName": "Charlie Brown",
                    "type": "Dog"
                }

**With reduced source, can't query outside the subset**

In [9]:
query(families, """

SELECT *
FROM c.children
WHERE c.location.state = 'NY'

""")

(BadRequest) Message: {"errors":[{"severity":"Error","location":{"start":33,"end":34},"code":"SC2001","message":"Identifier 'c' could not be resolved."}]}
ActivityId: a66502d9-a82c-4b45-a146-6c8c4cd903e6, Microsoft.Azure.Documents.Common/2.11.0


**All queryable properties need to be in scope**

In [10]:
query(families, """

SELECT c.children
FROM c
WHERE c.location.state = 'NY'

""")

2 document(s) selected (charge= 2.93 RUs)

[
    {
        "children": [
            {
                "givenName": "Michelle",
                "gender": "female",
                "grade": 1
            },
            {
                "givenName": "John",
                "gender": "male",
                "grade": 7,
                "pets": [
                    {
                        "givenName": "Tweetie",
                        "type": "Bird"
                    }
                ]
            }
        ]
    },
    {
        "children": [
            {
                "familyName": "Merriam",
                "givenName": "Jesse",
                "gender": "female",
                "grade": 6,
                "pets": [
                    {
                        "givenName": "Charlie Brown",
                        "type": "Dog"
                    },
                    {
                        "givenName": "Tiger",
                        "type": "Cat"
                    }

## FROM with IN

Use IN to break each item into multiple documents on a child array.

**Break each family into multiple documents on children array element**

In [11]:
query(families, """

SELECT *
FROM ch IN c.children

""")

5 document(s) selected (charge= 3.11 RUs)

[
    {
        "firstName": "Henriette Thaulow",
        "gender": "female",
        "grade": 5,
        "pets": [
            {
                "givenName": "Fluffy",
                "type": "Rabbit"
            }
        ]
    },
    {
        "givenName": "Michelle",
        "gender": "female",
        "grade": 1
    },
    {
        "givenName": "John",
        "gender": "male",
        "grade": 7,
        "pets": [
            {
                "givenName": "Tweetie",
                "type": "Bird"
            }
        ]
    },
    {
        "familyName": "Merriam",
        "givenName": "Jesse",
        "gender": "female",
        "grade": 6,
        "pets": [
            {
                "givenName": "Charlie Brown",
                "type": "Dog"
            },
            {
                "givenName": "Tiger",
                "type": "Cat"
            },
            {
                "givenName": "Princess",
                "type": 

**Produce a consistent shape**

In [12]:
query(families, """

SELECT
    ch.firstName,
    ch.givenName,
    ch.grade,
    ARRAY_LENGTH(ch.pets) AS numberOfPets,
    ch.pets
FROM
    ch IN c.children

""")

5 document(s) selected (charge= 3.17 RUs)

[
    {
        "firstName": "Henriette Thaulow",
        "grade": 5,
        "numberOfPets": 1,
        "pets": [
            {
                "givenName": "Fluffy",
                "type": "Rabbit"
            }
        ]
    },
    {
        "givenName": "Michelle",
        "grade": 1
    },
    {
        "givenName": "John",
        "grade": 7,
        "numberOfPets": 1,
        "pets": [
            {
                "givenName": "Tweetie",
                "type": "Bird"
            }
        ]
    },
    {
        "givenName": "Jesse",
        "grade": 6,
        "numberOfPets": 3,
        "pets": [
            {
                "givenName": "Charlie Brown",
                "type": "Dog"
            },
            {
                "givenName": "Tiger",
                "type": "Cat"
            },
            {
                "givenName": "Princess",
                "type": "Cat"
            }
        ]
    },
    {
        "givenName"

**Resolve schema differences**

In [13]:
query(families, """

SELECT
    ch.givenName ?? ch.firstName AS childName,
    ch.grade,
    ARRAY_LENGTH(ch.pets) ?? 0 AS numberOfPets,
    ch.pets ?? [] AS pets
FROM
    ch IN c.children

""")

5 document(s) selected (charge= 3.16 RUs)

[
    {
        "childName": "Henriette Thaulow",
        "grade": 5,
        "numberOfPets": 1,
        "pets": [
            {
                "givenName": "Fluffy",
                "type": "Rabbit"
            }
        ]
    },
    {
        "childName": "Michelle",
        "grade": 1,
        "numberOfPets": 0,
        "pets": []
    },
    {
        "childName": "John",
        "grade": 7,
        "numberOfPets": 1,
        "pets": [
            {
                "givenName": "Tweetie",
                "type": "Bird"
            }
        ]
    },
    {
        "childName": "Jesse",
        "grade": 6,
        "numberOfPets": 3,
        "pets": [
            {
                "givenName": "Charlie Brown",
                "type": "Dog"
            },
            {
                "givenName": "Tiger",
                "type": "Cat"
            },
            {
                "givenName": "Princess",
                "type": "Cat"
         

## The **%%sql** magic

Use the built-in **%%sql** magic to run SQL queries in a notebook without writing any Python code.

Output is rendered in tabular form.

**Set the default database and container names.**

In [14]:
%database Families

In [15]:
%container Families

**JSON results are converted from object array to tabular format for display.**

In [14]:
%%sql

SELECT
    c.id,
    c.location.city,
    c.location.state,
    c.children
FROM
    c

Unnamed: 0,id,city,state,children
0,AndersenFamily,Seattle,WA,"[{'firstName': 'Henriette Thaulow', 'gender': ..."
1,SmithFamily,Forest Hills,NY,"[{'givenName': 'Michelle', 'gender': 'female',..."
2,WakefieldFamily,NY,NY,"[{'familyName': 'Merriam', 'givenName': 'Jesse..."


## FROM with JOIN

Use JOIN to perform an "intra-document" join between a parent object and child array.

This works similar to IN, but also keeps parent properties in scope similar to an INNER JOIN in regular SQL.

**JOIN duplicates parent-level properties for each child**

In [15]:
%%sql

SELECT
    f.id,
    f.location.city,
    f.location.state,
    ch.givenName ?? ch.firstName AS childName,
    ARRAY_LENGTH(ch.pets) AS numberOfPets,
    ch.pets
FROM
    c AS f
    JOIN ch IN f.children

Unnamed: 0,id,city,state,childName,numberOfPets,pets
0,AndersenFamily,Seattle,WA,Henriette Thaulow,1.0,"[{'givenName': 'Fluffy', 'type': 'Rabbit'}]"
1,SmithFamily,Forest Hills,NY,Michelle,,
2,SmithFamily,Forest Hills,NY,John,1.0,"[{'givenName': 'Tweetie', 'type': 'Bird'}]"
3,WakefieldFamily,NY,NY,Jesse,3.0,"[{'givenName': 'Charlie Brown', 'type': 'Dog'}..."
4,WakefieldFamily,NY,NY,Lisa,1.0,"[{'givenName': 'Jake', 'type': 'Snake'}]"


**Use coalesce (??) to clean up the missing pet information**

In [16]:
%%sql

SELECT
    f.id,
    f.location.city,
    f.location.state,
    ch.givenName ?? ch.firstName AS childName,
    ARRAY_LENGTH(ch.pets) ?? 0 AS numberOfPets,
    ch.pets ?? [] AS pets
FROM
    c AS f
    JOIN ch IN f.children

Unnamed: 0,id,city,state,childName,numberOfPets,pets
0,AndersenFamily,Seattle,WA,Henriette Thaulow,1,"[{'givenName': 'Fluffy', 'type': 'Rabbit'}]"
1,SmithFamily,Forest Hills,NY,Michelle,0,[]
2,SmithFamily,Forest Hills,NY,John,1,"[{'givenName': 'Tweetie', 'type': 'Bird'}]"
3,WakefieldFamily,NY,NY,Jesse,3,"[{'givenName': 'Charlie Brown', 'type': 'Dog'}..."
4,WakefieldFamily,NY,NY,Lisa,1,"[{'givenName': 'Jake', 'type': 'Snake'}]"


**Double JOIN to break on children and pets**

In [17]:
%%sql

SELECT
    f.id,
    ch.givenName ?? ch.firstName AS childName,
    p.givenName AS petName,
    p.type
FROM
    c AS f
    JOIN ch IN f.children
    JOIN p IN ch.pets


Unnamed: 0,id,childName,petName,type
0,AndersenFamily,Henriette Thaulow,Fluffy,Rabbit
1,SmithFamily,John,Tweetie,Bird
2,WakefieldFamily,Jesse,Charlie Brown,Dog
3,WakefieldFamily,Jesse,Tiger,Cat
4,WakefieldFamily,Jesse,Princess,Cat
5,WakefieldFamily,Lisa,Jake,Snake


**Just need a list of pet names**

In [18]:
%%sql

SELECT p.givenName 
FROM c AS f
JOIN ch IN f.children 
JOIN p IN ch.pets

Unnamed: 0,givenName
0,Fluffy
1,Tweetie
2,Charlie Brown
3,Tiger
4,Princess
5,Jake


**Get the list of pet names as JSON**

In [19]:
query(families, """

SELECT p.givenName 
FROM c AS f
JOIN ch IN f.children 
JOIN p IN ch.pets

""")

6 document(s) selected (charge= 3.13 RUs)

[
    {
        "givenName": "Fluffy"
    },
    {
        "givenName": "Tweetie"
    },
    {
        "givenName": "Charlie Brown"
    },
    {
        "givenName": "Tiger"
    },
    {
        "givenName": "Princess"
    },
    {
        "givenName": "Jake"
    }
]


**Use VALUE to get a string array and not an object array**

In [20]:
query(families, """

SELECT VALUE p.givenName 
FROM c AS f
JOIN ch IN f.children 
JOIN p IN ch.pets

""")

6 document(s) selected (charge= 3.13 RUs)

[
    "Fluffy",
    "Tweetie",
    "Charlie Brown",
    "Tiger",
    "Princess",
    "Jake"
]


**Using VALUE with %%sql magic returns raw JSON instead of tabular output**

In [21]:
%%sql

SELECT VALUE p.givenName 
FROM c AS f
JOIN ch IN f.children 
JOIN p IN ch.pets

['Fluffy', 'Tweetie', 'Charlie Brown', 'Tiger', 'Princess', 'Jake']

## Projections

**Project all properties from all stores**

In [22]:
query(mystore, """

SELECT * FROM c

""")

701 document(s) selected (charge= 2.28 RUs)

[
    {
        "name": "Fitness Department Stores",
        "address": {
            "addressType": "Main Office",
            "addressLine1": "7750 E Marching Rd",
            "location": {
                "city": "Scottsdale",
                "stateProvinceName": "Arizona"
            },
            "postalCode": "85257",
            "countryRegionName": "United States"
        },
        "id": "01fbb9f5-b421-48d4-8ba3-7c1f38fb79f4",
        "_rid": "A6VaAJTIj7QBAAAAAAAAAA==",
        "_self": "dbs/A6VaAA==/colls/A6VaAJTIj7Q=/docs/A6VaAJTIj7QBAAAAAAAAAA==/",
        "_etag": "\"000088f4-0000-0100-0000-5ee4dd3a0000\"",
        "_attachments": "attachments/",
        "_ts": 1592057147
    },
    {
        "name": "Extreme Toy Store",
        "address": {
            "addressType": "Main Office",
            "addressLine1": "7725 Luddington Lane",
            "location": {
                "city": "Birmingham",
                "stateProvinceN

**Project just the ID and store name properties**

In [23]:
query(mystore, """

SELECT c.id, c.name
FROM c

""")

701 document(s) selected (charge= 2.27 RUs)

[
    {
        "id": "01fbb9f5-b421-48d4-8ba3-7c1f38fb79f4",
        "name": "Fitness Department Stores"
    },
    {
        "id": "edf5635f-38c5-44db-bf53-d88e8fe5dd0e",
        "name": "Extreme Toy Store"
    },
    {
        "id": "0a788003-b60e-4ad3-8c3d-f7556d545d3b",
        "name": "Real Sporting Goods"
    },
    {
        "id": "4fe1b1a7-2da4-46fe-a8f2-ba1f2d6f6cc3",
        "name": "Exchange Parts Inc."
    },
    {
        "id": "d14c086c-73e7-4830-8210-982ba78a1e90",
        "name": "Reliable Brake Systems"
    },
    {
        "id": "5760145c-4200-4682-95f8-169b2eb3909f",
        "name": "Metal Processing Company"
    },
    {
        "id": "346f8118-4922-4dfe-ae9f-d283227b8b97",
        "name": "Two Wheels Cycle Store"
    },
    {
        "id": "3fb10591-b621-4729-b983-be82100418d4",
        "name": "Capable Sales and Service"
    },
    {
        "id": "958198d3-485a-4d3e-bf50-a8ab7785d996",
        "name": "Atypical Bike C

**Project just the store name property**

In [24]:
query(mystore, """

SELECT c.name
FROM c

""")

701 document(s) selected (charge= 2.27 RUs)

[
    {
        "name": "Fitness Department Stores"
    },
    {
        "name": "Extreme Toy Store"
    },
    {
        "name": "Real Sporting Goods"
    },
    {
        "name": "Exchange Parts Inc."
    },
    {
        "name": "Reliable Brake Systems"
    },
    {
        "name": "Metal Processing Company"
    },
    {
        "name": "Two Wheels Cycle Store"
    },
    {
        "name": "Capable Sales and Service"
    },
    {
        "name": "Atypical Bike Company"
    },
    {
        "name": "Fitness Supplies"
    }
]


**Project just the store name property as a string array using VALUE**

In [25]:
# Get all names as a string array using VALUE
query(mystore, """

SELECT VALUE c.name
FROM c

""")

701 document(s) selected (charge= 2.73 RUs)

[
    "Fitness Department Stores",
    "Extreme Toy Store",
    "Real Sporting Goods",
    "Exchange Parts Inc.",
    "Reliable Brake Systems",
    "Metal Processing Company",
    "Two Wheels Cycle Store",
    "Capable Sales and Service",
    "Atypical Bike Company",
    "Fitness Supplies"
]


**Use string concatenation for single value**

In [26]:
query(mystore, """

SELECT VALUE c.name || ', ' || c.address.countryRegionName
FROM c

""")

701 document(s) selected (charge= 2.74 RUs)

[
    "Fitness Department Stores, United States",
    "Extreme Toy Store, United States",
    "Real Sporting Goods, United States",
    "Exchange Parts Inc., United States",
    "Reliable Brake Systems, United States",
    "Metal Processing Company, Canada",
    "Two Wheels Cycle Store, United States",
    "Capable Sales and Service, United States",
    "Atypical Bike Company, France",
    "Fitness Supplies, United States"
]


**Project entirely different shape using inline JSON**

In [27]:
query(mystore, """

SELECT
  c.address.countryRegionName AS country,
  {
    "storeName": c.name,
    "cityStateZip": [
        c.address.location.city,
        c.address.location.stateProvinceName
    ],
    "metadata": {
        "internalId": c.id,
        "timestamp": c._ts
    }
  } AS storeInfo
FROM c

""")

701 document(s) selected (charge= 2.29 RUs)

[
    {
        "country": "United States",
        "storeInfo": {
            "storeName": "Fitness Department Stores",
            "cityStateZip": [
                "Scottsdale",
                "Arizona"
            ],
            "metadata": {
                "internalId": "01fbb9f5-b421-48d4-8ba3-7c1f38fb79f4",
                "timestamp": 1592057147
            }
        }
    },
    {
        "country": "United States",
        "storeInfo": {
            "storeName": "Extreme Toy Store",
            "cityStateZip": [
                "Birmingham",
                "Alabama"
            ],
            "metadata": {
                "internalId": "edf5635f-38c5-44db-bf53-d88e8fe5dd0e",
                "timestamp": 1592057147
            }
        }
    },
    {
        "country": "United States",
        "storeInfo": {
            "storeName": "Real Sporting Goods",
            "cityStateZip": [
                "Phoenix",
                "

## Range Queries

**Set the default database and container names.**

In [30]:
%database mydb

In [31]:
%container mystore

**Get A-K**

In [28]:
%%sql

SELECT c.name, c.address.countryRegionName
FROM c
WHERE c.name >= 'A' AND c.name <= 'K'

Unnamed: 0,name,countryRegionName
0,Fitness Department Stores,United States
1,Extreme Toy Store,United States
2,Exchange Parts Inc.,United States
3,Capable Sales and Service,United States
4,Atypical Bike Company,France
...,...,...
298,Great Bicycle Supply,United States
299,Farthest Bike Store,Canada
300,Grand Cycle Store,United States
301,Finer Riding Supplies,Canada


**Same thing using BETWEEN**

In [29]:
%%sql

SELECT c.name, c.address.countryRegionName
FROM c
WHERE c.name BETWEEN 'A' AND 'K'

Unnamed: 0,name,countryRegionName
0,Fitness Department Stores,United States
1,Extreme Toy Store,United States
2,Exchange Parts Inc.,United States
3,Capable Sales and Service,United States
4,Atypical Bike Company,France
...,...,...
298,Great Bicycle Supply,United States
299,Farthest Bike Store,Canada
300,Grand Cycle Store,United States
301,Finer Riding Supplies,Canada


**Get L-Z**

In [30]:
%%sql

SELECT c.name, c.address.countryRegionName
FROM c
WHERE c.name BETWEEN 'L' AND 'Z'

Unnamed: 0,name,countryRegionName
0,Real Sporting Goods,United States
1,Reliable Brake Systems,United States
2,Metal Processing Company,Canada
3,Two Wheels Cycle Store,United States
4,Travel Sports,United States
...,...,...
389,Thrilling Bike Tours,United States
390,Variety Cycling,United Kingdom
391,Utilitarian Sporting Goods,United States
392,One-Piece Handle Bars,United States


## ORDER BY

Sort ascending and descending on any property.

Note that sorting on multiple properties is supported, but requires that you first create a custom composite index against the desired properties.

**Sort ascending**

In [31]:
%%sql

SELECT
    c.name,
    c.address.location.stateProvinceName,
    c.address.location.city
FROM c
ORDER BY c.name

Unnamed: 0,name,stateProvinceName,city
0,A Bicycle Association,New York,De Witt
1,A Bike Store,Washington,Seattle
2,A Cycle Shop,Oregon,Albany
3,A Great Bicycle Company,Missouri,Jefferson City
4,A Typical Bike Shop,Texas,Round Rock
...,...,...,...
696,World of Bikes,Missouri,Saint Louis
697,Worthwhile Activity Store,Florida,Miami
698,Year-Round Sports,Washington,Kent
699,Yellow Bicycle Company,Missouri,Saint Louis


**Sort descending**

In [32]:
%%sql

SELECT
    c.name,
    c.address.location.stateProvinceName,
    c.address.location.city
FROM c
ORDER BY c.name DESC

Unnamed: 0,name,stateProvinceName,city
0,eCommerce Bikes,Mississippi,Gulfport
1,Yellow Bicycle Company,Missouri,Saint Louis
2,Year-Round Sports,Washington,Kent
3,Worthwhile Activity Store,Florida,Miami
4,World of Bikes,Missouri,Saint Louis
...,...,...,...
696,A Typical Bike Shop,Texas,Round Rock
697,A Great Bicycle Company,Missouri,Jefferson City
698,A Cycle Shop,Oregon,Albany
699,A Bike Store,Washington,Seattle


## TOP and OFFSET...LIMIT

Get the first page (TOP) or any page (OFFSET...LIMIT) of a sorted resultset.

**Limit results with TOP**

In [33]:
%%sql

SELECT TOP 10 c.name, c.address.location.city
FROM c
WHERE c.address.countryRegionName = 'United States'
ORDER BY c.name

Unnamed: 0,name,city
0,A Bicycle Association,De Witt
1,A Bike Store,Seattle
2,A Cycle Shop,Albany
3,A Great Bicycle Company,Jefferson City
4,A Typical Bike Shop,Round Rock
5,Acclaimed Bicycle Company,Mcdonough
6,Active Cycling,Heath
7,Active Systems,Duluth
8,Active Transport Inc.,North Randall
9,Activity Center,Crossville


**Paged results with OFFSET...LIMIT**

In [34]:
%%sql

SELECT c.name, c.address.location.city
FROM c
WHERE c.address.countryRegionName = 'United States'
ORDER BY c.name
OFFSET 10 LIMIT 10

Unnamed: 0,name,city
0,Advanced Bike Components,Irving
1,Aerobic Exercise Company,Camarillo
2,Affordable Sports Equipment,Lake Elsinore
3,All Cycle Shop,Bothell
4,All Seasons Sports Supply,Houston
5,Alpine Ski House,Elk Grove
6,Alternative Vehicles,Washougal
7,Another Bicycle Company,Milwaukie
8,Another Sporting Goods Company,Westminster
9,Area Bike Accessories,Modesto


## Subset filtering

**Get all city and state names**

In [35]:
%%sql

SELECT
 c.address.location.city,
 c.address.location.stateProvinceName
FROM c

Unnamed: 0,city,stateProvinceName
0,Scottsdale,Arizona
1,Birmingham,Alabama
2,Phoenix,Arizona
3,Raleigh,North Carolina
4,Memphis,Tennessee
...,...,...
696,Tupelo,Mississippi
697,Spokane,Washington
698,Burnaby,British Columbia
699,Milton Keynes,England


**Simpler by reducing subset**

In [36]:
%%sql

SELECT *
FROM c.address.location

Unnamed: 0,city,stateProvinceName
0,Scottsdale,Arizona
1,Birmingham,Alabama
2,Phoenix,Arizona
3,Raleigh,North Carolina
4,Memphis,Tennessee
...,...,...
696,Tupelo,Mississippi
697,Spokane,Washington
698,Burnaby,British Columbia
699,Milton Keynes,England


**Can't filter on subset without an alias**

In [41]:
%%sql

SELECT *
FROM c.address.location
WHERE c.address.location.stateProvinceName = 'Florida'

%%sql failed! Microsoft.Azure.Cosmos.CosmosException : Response status code does not indicate success: 400 Substatus: 0 Reason: (Microsoft.Azure.Cosmos.CosmosException : Response status code does not indicate success: 400 Substatus: 0 Reason: (Microsoft.Azure.Documents.DocumentClientException: Gateway Failed to Retrieve Query Plan: Message: {"errors":[{"severity":"Error","location":{"start":39,"end":40},"code":"SC2001","message":"Identifier 'c' could not be resolved."}]}ActivityId: aaab2108-adcf-4f75-8a41-1419c789bd91, Microsoft.Azure.Documents.Common/2.11.0, Microsoft.Azure.Documents.Common/2.11.0, Linux/18.04 cosmos-netstandard-sdk/3.4.2   at Microsoft.Azure.Cosmos.GatewayStoreClient.ParseResponseAsync(HttpResponseMessage responseMessage, JsonSerializerSettings serializerSettings, DocumentServiceRequest request)   at Microsoft.Azure.Cosmos.GatewayStoreClient.InvokeAsync(DocumentServiceRequest request, ResourceType resourceType, Uri physicalAddress, CancellationToken cancellationToken

**Alias the subset to filter within the subset**

In [37]:
%%sql

SELECT *
FROM c.address.location AS l
WHERE l.stateProvinceName IN ('Florida', 'Illinois')

Unnamed: 0,city,stateProvinceName
0,Tampa,Florida
1,Tuscola,Illinois
2,Destin,Florida
3,Norridge,Illinois
4,Sunrise,Florida
5,Chicago,Illinois
6,Lakeland,Florida
7,Vero Beach,Florida
8,Orlando,Florida
9,Miami,Florida


## Calculated properties

**Boolean expression property (on = comparison)**

In [38]:
%%sql

SELECT
 c.name,
 c.address.location.city,
 c.address.location.stateProvinceName,
 c.address.location.city = c.address.location.stateProvinceName AS isSameCityState
FROM c
WHERE STARTSWITH(c.name, 'Onl') = true
ORDER BY c.address.location.stateProvinceName

Unnamed: 0,name,city,stateProvinceName,isSameCityState
0,Online Bike Sellers,Midland,Michigan,False
1,Online Bike Catalog,Lavender Bay,New South Wales,False
2,Only Bikes and Accessories,New York,New York,True
3,Online Bike Warehouse,Aurora,Ontario,False


**Ternary (?) operator**

In [39]:
%%sql

SELECT
 c.name,
 c.address.countryRegionName AS region,
 (c.address.countryRegionName = 'United States' ? 'USD' : 'CAD' ) AS currency
FROM c
WHERE c.address.countryRegionName IN ('United States', 'Canada')

Unnamed: 0,name,region,currency
0,Fitness Department Stores,United States,USD
1,Extreme Toy Store,United States,USD
2,Real Sporting Goods,United States,USD
3,Exchange Parts Inc.,United States,USD
4,Reliable Brake Systems,United States,USD
...,...,...,...
536,Farthest Bike Store,Canada,CAD
537,One-Piece Handle Bars,United States,USD
538,Grand Cycle Store,United States,USD
539,Finer Riding Supplies,Canada,CAD


**Nested ternary (?) operator**

In [40]:
%%sql

SELECT
 c.name,
 c.address.countryRegionName AS region,
 (c.address.countryRegionName = 'United States' ? 'USD' :
  (c.address.countryRegionName = 'Canada' ? 'CAD' :
   'EUR')) AS currency
FROM c
WHERE c.address.countryRegionName IN ('United States', 'Canada', 'France')

Unnamed: 0,name,region,currency
0,Fitness Department Stores,United States,USD
1,Extreme Toy Store,United States,USD
2,Real Sporting Goods,United States,USD
3,Exchange Parts Inc.,United States,USD
4,Reliable Brake Systems,United States,USD
...,...,...,...
576,Farthest Bike Store,Canada,CAD
577,One-Piece Handle Bars,United States,USD
578,Grand Cycle Store,United States,USD
579,Finer Riding Supplies,Canada,CAD


## Aggregation queries

**Count for zip code**

In [41]:
query(mystore, """

SELECT COUNT(c) AS Zip14111Count
FROM c
WHERE
 c.address.postalCode = '14111'
 
""", partitionKey = '14111')

1 document(s) selected (charge= 3.16 RUs)

[
    {
        "Zip14111Count": 3
    }
]


**Can't count across partitions without VALUE**

In [42]:
query(mystore, """

SELECT COUNT(c) AS USCount
FROM c
WHERE
 c.address.countryRegionName = 'United States'
 
""")

(BadRequest) Message: {"Errors":["Cross partition query only supports 'VALUE <AggreateFunc>' for aggregates."]}
ActivityId: 728da26c-f75a-46e1-91f3-8c078e02c5a3, Microsoft.Azure.Documents.Common/2.11.0


**Use VALUE for cross partition aggregates**

In [43]:
query(mystore, """

SELECT VALUE COUNT(c)
FROM c
WHERE
 c.address.countryRegionName = 'United States'
 
""")

1 document(s) selected (charge= 3.16 RUs)

[
    427
]


**Aggregates for NY children**

In [44]:
query(families, """

SELECT
 COUNT(ch) AS NyKidsCount,
 MIN(ch.grade) AS NyMinGrade,
 MAX(ch.grade) AS NyMaxGrade,
 SUM(ARRAY_LENGTH(ch.pets)) AS NyPetsCount
FROM c JOIN ch IN c.children
WHERE c.location.state = 'NY'

""", partitionKey = "NY")

1 document(s) selected (charge= 3.3 RUs)

[
    {
        "NyKidsCount": 4,
        "NyMinGrade": 1,
        "NyMaxGrade": 7,
        "NyPetsCount": 5
    }
]


**All U.S. stores**

In [45]:
%%sql --database mydb --container mystore

SELECT
    c.name,
    c.address.location.stateProvinceName
FROM c
WHERE c.address.countryRegionName = 'United States'
ORDER BY c.name

Unnamed: 0,name,stateProvinceName
0,A Bicycle Association,New York
1,A Bike Store,Washington
2,A Cycle Shop,Oregon
3,A Great Bicycle Company,Missouri
4,A Typical Bike Shop,Texas
...,...,...
422,World of Bikes,Missouri
423,Worthwhile Activity Store,Florida
424,Year-Round Sports,Washington
425,Yellow Bicycle Company,Missouri


**GROUP BY**

*This functionality does not work with cross-partition queries in Jupyter Notebooks. Run it in Data Explorer instead, or use SDK3.*

In [46]:
query(mystore, """

SELECT
    COUNT(c) AS StoreCount,
    c.address.location.stateProvinceName
FROM c
WHERE c.address.countryRegionName = 'United States'
GROUP BY c.address.location.stateProvinceName

""")

(BadRequest) Message: {"Errors":["Cross partition query only supports 'VALUE <AggreateFunc>' for aggregates."]}
ActivityId: 98507eeb-ca51-448e-9739-04feac1bcb74, Microsoft.Azure.Documents.Common/2.11.0


**Capture all U.S. stores into a Pandas dataframe**

In [47]:
%%sql --database mydb --container mystore --output df_usStores

SELECT
    c.name,
    c.address.location.stateProvinceName
FROM c
WHERE c.address.countryRegionName = 'United States'
ORDER BY c.name

In [48]:
display(df_usStores)

Unnamed: 0,name,stateProvinceName
0,A Bicycle Association,New York
1,A Bike Store,Washington
2,A Cycle Shop,Oregon
3,A Great Bicycle Company,Missouri
4,A Typical Bike Shop,Texas
...,...,...
422,World of Bikes,Missouri
423,Worthwhile Activity Store,Florida
424,Year-Round Sports,Washington
425,Yellow Bicycle Company,Missouri


**Use .groupby on Pandas dataframe to get counts by state**

In [49]:
df_byState = df_usStores.groupby("stateProvinceName").count().reset_index()

display(df_byState)

Unnamed: 0,stateProvinceName,name
0,Alabama,6
1,Arizona,12
2,California,78
3,Colorado,9
4,Connecticut,9
5,Florida,27
6,Georgia,13
7,Idaho,3
8,Illinois,15
9,Indiana,9


**Enable paging, filtering, sorting, and charting**

In [50]:
pd.options.display.html.table_schema = True

display(df_byState)

pd.options.display.html.table_schema = False

Unnamed: 0,stateProvinceName,name
0,Alabama,6
1,Arizona,12
2,California,78
3,Colorado,9
4,Connecticut,9
5,Florida,27
6,Georgia,13
7,Idaho,3
8,Illinois,15
9,Indiana,9


**Cross-partition aggregates require VALUE**

In [51]:
query(families, showStats = False, desc = "Total families", sql =
      "SELECT VALUE COUNT(c) FROM c")

query(families, showStats = False, desc = "Lowest grade", sql =
      "SELECT VALUE MIN(ch.grade) FROM ch IN c.children")

query(families, showStats = False, desc = "Highest grade", sql =
      "SELECT VALUE MAX(ch.grade) FROM ch IN c.children")

query(families, showStats = False, desc = "Average grade", sql =
      "SELECT VALUE AVG(ch.grade) FROM ch IN c.children")

query(families, showStats = False, desc = "Fewest children", sql =
      "SELECT VALUE MIN(ARRAY_LENGTH(c.children)) FROM c")

query(families, showStats = False, desc = "Most children", sql =
      "SELECT VALUE MAX(ARRAY_LENGTH(c.children)) FROM c")

query(families, showStats = False, desc = "Total children", sql =
      "SELECT VALUE SUM(ARRAY_LENGTH(c.children)) FROM c")

query(families, showStats = False, desc = "Total pets", sql =
      "SELECT VALUE SUM(ARRAY_LENGTH(ch.pets)) FROM ch IN c.children")


Total families
[
    3
]
Lowest grade
[
    1
]
Highest grade
[
    7
]
Average grade
[
    4.4
]
Fewest children
[
    1
]
Most children
[
    2
]
Total children
[
    5
]
Total pets
[
    6
]


## Spatial Queries

**Each family document has GeoJSON coordinates in the geo property**

In [52]:
query(families, """

SELECT
 c.id || ', ' || c.location.city || ' (' || c.location.county || '), ' || c.location.state AS family,
 c.geo
FROM c

""")

3 document(s) selected (charge= 2.37 RUs)

[
    {
        "family": "AndersenFamily, Seattle (King), WA",
        "geo": {
            "type": "Point",
            "coordinates": [
                -122.3295,
                47.60357
            ]
        }
    },
    {
        "family": "SmithFamily, Forest Hills (Queens), NY",
        "geo": {
            "type": "Point",
            "coordinates": [
                -73.84791,
                40.72266
            ]
        }
    },
    {
        "family": "WakefieldFamily, NY (Manhattan), NY",
        "geo": {
            "type": "Point",
            "coordinates": [
                -73.992,
                40.731
            ]
        }
    }
]


**Show each family's distance (in meters) from NYC**

In [53]:
%%sql --database Families --container Families

SELECT
 c.id || ', ' || c.location.city || ' (' || c.location.county || '), ' || c.location.state AS family,
 ST_DISTANCE(c.geo, 
  {
   'type': 'Point',
   'coordinates': [-73.992, 40.73104]
  }
 ) AS metersFromNyc
FROM c

Unnamed: 0,family,metersFromNyc
0,"AndersenFamily, Seattle (King), WA",3875551.0
1,"SmithFamily, Forest Hills (Queens), NY",12208.47
2,"WakefieldFamily, NY (Manhattan), NY",4.441948


**Show each family's distance (in kilometers) from NYC**

In [54]:
%%sql --database Families --container Families

SELECT
 c.id || ', ' || c.location.city || ' (' || c.location.county || '), ' || c.location.state AS family,
 ST_DISTANCE(c.geo,
  {
   'type': 'Point',
   'coordinates': [-73.992, 40.73104]
  }
 ) / 1000 AS kmFromNyc
FROM c

Unnamed: 0,family,kmFromNyc
0,"AndersenFamily, Seattle (King), WA",3875.551243
1,"SmithFamily, Forest Hills (Queens), NY",12.208466
2,"WakefieldFamily, NY (Manhattan), NY",0.004442


**Show each family's distance (in miles) from NYC**

In [55]:
%%sql --database Families --container Families

SELECT
 c.id || ', ' || c.location.city || ' (' || c.location.county || '), ' || c.location.state AS family,
 ST_DISTANCE(c.geo,
  {
   'type': 'Point',
   'coordinates': [-73.992, 40.73104]
  }
 ) / 1000 * .62137119
 AS milesFromNyc
FROM c

Unnamed: 0,family,milesFromNyc
0,"AndersenFamily, Seattle (King), WA",2408.155888
1,"SmithFamily, Forest Hills (Queens), NY",7.585989
2,"WakefieldFamily, NY (Manhattan), NY",0.00276


**Get families within 8 miles of NYC**

In [56]:
%%sql --database Families --container Families

SELECT
 c.id || ', ' || c.location.city || ' (' || c.location.county || '), ' || c.location.state AS family
FROM c
WHERE
 ST_DISTANCE(c.geo,
  {
   'type': 'Point',
   'coordinates':
     [-73.992, 40.73104]
  }
 ) / 1000 * .62137119 <= 8

Unnamed: 0,family
0,"SmithFamily, Forest Hills (Queens), NY"
1,"WakefieldFamily, NY (Manhattan), NY"


**Get families within NYC**

In [57]:
%%sql --database Families --container Families

SELECT
 c.id || ', ' || c.location.city || ' (' || c.location.county || '), ' || c.location.state AS Family
FROM c
WHERE
 ST_INTERSECTS(c.geo, {
   'type': 'Polygon',
   'coordinates': [ [
     [ -74.01326, 40.7003  ], [ -73.99884, 40.70875 ], [ -73.97893, 40.71129 ], [ -73.97121, 40.72697 ],
     [ -73.97387, 40.73477 ], [ -73.97142, 40.74401 ], [ -73.94205, 40.77606 ], [ -73.94414, 40.78217 ],
     [ -73.9396 , 40.78535 ], [ -73.93622, 40.79111 ], [ -73.92938, 40.79567 ], [ -73.92905, 40.80088 ],
     [ -73.93481, 40.80926 ], [ -73.934  , 40.81644 ], [ -73.93531, 40.83477 ], [ -73.92764, 40.84868 ],
     [ -73.92286, 40.85595 ], [ -73.91137, 40.86855 ], [ -73.91035, 40.87121 ], [ -73.91198, 40.87332 ],
     [ -73.91451, 40.87431 ], [ -73.92838, 40.86712 ], [ -73.9337 , 40.85868 ], [ -73.94451, 40.84999 ],
     [ -73.94494, 40.84232 ], [ -74.00786, 40.75435 ], [ -74.01069, 40.72892 ], [ -74.0185 , 40.70381 ],
     [ -74.01326, 40.7003  ]
   ] ]
  }) = true

Unnamed: 0,Family
0,"WakefieldFamily, NY (Manhattan), NY"


<img src="https://cdbdemos.blob.core.windows.net/demos/cosmos-geo-nyc.png" />

**Check for valid GeoJSON**

In [58]:
query(families, """

SELECT
 ST_ISVALID(
  {
   'type': 'Point',
   'coordinates': [-122.3295, 47.60357]
  }
 ) AS isValidSpatial
 
 """)

1 document(s) selected (charge= 2.25 RUs)

[
    {
        "isValidSpatial": true
    }
]


**With latitude and longitude reversed, this GeoJSON is invalid**

In [59]:
query(families, """

SELECT
 ST_ISVALID(
  {
   'type': 'Point',
   'coordinates': [47.60357, -122.3295]
  }
 ) AS isValidSpatial
 
 """)

1 document(s) selected (charge= 2.25 RUs)

[
    {
        "isValidSpatial": false
    }
]


**Find out why the GeoJSON is invalid**

In [60]:
query(families, """

SELECT
 ST_ISVALIDDETAILED(
  {
   'type': 'Point',
   'coordinates': [47.60357, -122.3295]
  }
 ) AS isValidSpatial
 
""")

1 document(s) selected (charge= 2.25 RUs)

[
    {
        "isValidSpatial": {
            "valid": false,
            "reason": "Latitude values must be between -90 and 90 degrees."
        }
    }
]
