## Azure DocumentDB Lab - The Basics

### Install the DocumentDB Python SDK

In [12]:
!pip install pydocumentdb

[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


### Imports

In [13]:
import json
from urllib.request import urlopen, Request

import pydocumentdb.documents as documents
import pydocumentdb.document_client as document_client
import pydocumentdb.errors as errors

In [32]:
# Fill these in with your specific information
DOCUMENTDB_ACCOUNT = 'your docdb account name here'
KEY = 'your key here=='

# Host or URI
URI = 'https://%s.documents.azure.com:443/' % DOCUMENTDB_ACCOUNT

# Name of the database and collection (these will get created in lab)
DATABASE = 'test_docdb'
COLLECTION = 'test_coll'

### Create a database

In [37]:
# Firstly, make a client connection to 

client = document_client.DocumentClient(URI, {'masterKey': KEY})

# Select database with id listed in our configuration settings
#   and create an iterable object over databases
query_iterable = client.QueryDatabases('SELECT * FROM root r WHERE r.id="%s"' % DATABASE)
it = iter(query_iterable)

# Grab the database or if it doesn't exist, none is returned
test_db = next(it, None)

# Create the database only if it does not exist (which it should not if torn down properly)
if test_db is None:
    test_db = client.CreateDatabase({'id' : DATABASE})
    print("Created database: ", DATABASE)

INFO:Starting new HTTPS connection (1): training-docdb.documents.azure.com
INFO:Starting new HTTPS connection (1): training-docdb-westus.documents.azure.com


### Create a collection

In [38]:
# Again, read databases into a list to check if ours is there
existing_databases = list(client.ReadDatabases())
print(existing_databases)

# Create the test collection only when it's not already present
query_iterable = client.QueryCollections(test_db['_self'],
                                            'SELECT * FROM root r WHERE r.id="%s"' % COLLECTION)
it = iter(query_iterable)

# Grab the collection (named in configuration info) or if it doesn't exist, none is returned
test_coll = next(it, None)

# Create the collection only if it doesn't exist (which it should not if torn down properly)
if test_coll is None:
    test_coll = client.CreateCollection(test_db['_self'], {'id' : COLLECTION})
    print("Created collection: ", COLLECTION)

# List existing collections
list(client.ReadCollections(test_db['_self']))

[{'_colls': 'colls/', '_self': 'dbs/X2A0AA==/', '_users': 'users/', 'id': 'ToDoList', '_ts': 1483569552, '_rid': 'X2A0AA==', '_etag': '"00007100-0000-0000-0000-586d79950000"'}, {'_colls': 'colls/', '_self': 'dbs/lShQAA==/', '_users': 'users/', 'id': 'test_docdb', '_ts': 1483572531, '_rid': 'lShQAA==', '_etag': '"00002301-0000-0000-0000-586d85380000"'}]


[{'_conflicts': 'conflicts/',
  '_docs': 'docs/',
  '_etag': '"00004701-0000-0000-0000-586e337a0000"',
  '_rid': 'lShQAOYrOA0=',
  '_self': 'dbs/lShQAA==/colls/lShQAOYrOA0=/',
  '_sprocs': 'sprocs/',
  '_triggers': 'triggers/',
  '_ts': 1483617140,
  '_udfs': 'udfs/',
  'defaultTtl': 5,
  'id': 'test_coll',
  'indexingPolicy': {'automatic': True,
   'excludedPaths': [],
   'includedPaths': [{'indexes': [{'dataType': 'Number',
       'kind': 'Range',
       'precision': -1},
      {'dataType': 'String', 'kind': 'Hash', 'precision': 3}],
     'path': '/*'}],
   'indexingMode': 'consistent'}}]

### Working with documents

In [39]:
# Read json documents and add to the collection

# a place to store the document definitions
doc_definitions = []

urls = ['https://gist.githubusercontent.com/michhar/dfa446fd2336f9661a7b3938bd692970/raw/59d38e137c3d86b1052b3a9be2aa7fbe16bb3c05/movie001.json',
        'https://gist.githubusercontent.com/michhar/dfa446fd2336f9661a7b3938bd692970/raw/59d38e137c3d86b1052b3a9be2aa7fbe16bb3c05/movie002.json',
        'https://gist.githubusercontent.com/michhar/dfa446fd2336f9661a7b3938bd692970/raw/59d38e137c3d86b1052b3a9be2aa7fbe16bb3c05/movie003.json']
        
# collect all json documents from the URLs
for doc in urls:
    
    # try clause to grab our json document data and read in to a dictionary
    try:
        # Send out url request
        response = urlopen(doc)

        # Response will be in json file format, in Python3 must decode
        respjson = response.read().decode("utf-8")

        # Let's convert json to a python dictionary
        respdict = json.loads(respjson)
        
        doc_definitions.append(respdict)
        
    # Exception handling
    except HTTPError as e:
        print('HTTP Error message: %s' % e.message)
    except URLError as e:
        print('HTTP Error message: %s' % e.args)
    except HTTPException as e:
        print('HTTP Error message: %s' % e.args)
    except Exception:
        import traceback
        print('generic exception: ' + traceback.format_exc())

In [40]:
# An empty container to save document ids for later
#   (these are generated when we create the doc)
doc_ids = []

# Go through each document definition from reading in the documents above and
#  create a document (insert one) in our DocumentDB collection
for doc_def in doc_definitions:

    # Create document in the DocDB database/collection
    #   - this will create a document with a unique id - so doc will not be overwritten
    #   - if one wishes to overwrite docs, it's easy enough, as we are saving ids
    #   - or one could give the doc an id (e.g., doc['id'] = filename)
    created_doc = client.CreateDocument(test_coll['_self'], doc_def)

    # Save the document id in case we need to reference these later
    doc_ids.append(created_doc['id'])

# Check location from last doc added using the document's link
client.ReadDocument(created_doc['_self'])
print(client.last_response_headers)

{'Cache-Control': 'no-store, no-cache', 'x-ms-session-token': '0:59', 'Content-Type': 'application/json', 'x-ms-xp-role': '1', 'Pragma': 'no-cache', 'x-ms-alt-content-path': 'dbs/test_docdb/colls/test_coll', 'Content-Location': 'https://training-docdb-westus.documents.azure.com/dbs/lShQAA==/colls/lShQAOYrOA0=/docs/lShQAOYrOA0GAAAAAAAAAA==/', 'x-ms-gatewayversion': 'version=1.10.85.2', 'Transfer-Encoding': 'chunked', 'Date': 'Thu, 05 Jan 2017 11:57:07 GMT', 'x-ms-request-charge': '1', 'x-ms-schemaversion': '1.2', 'x-ms-resource-usage': 'documentSize=0;documentsSize=5;collectionSize=5;', 'etag': '"1f02548b-0000-0000-0000-586e34910000"', 'x-ms-serviceversion': 'version=1.10.85.2', 'Strict-Transport-Security': 'max-age=31536000', 'Server': 'Microsoft-HTTPAPI/2.0', 'x-ms-last-state-change-utc': 'Thu, 05 Jan 2017 08:02:06.014 GMT', 'x-ms-resource-quota': 'documentSize=10240;documentsSize=10485760;collectionSize=10485760;', 'x-ms-activity-id': '14775447-4e99-4fbf-8631-22251b91b659'}


### Query the documents

In [41]:
# an order by query
query = {
        'query': 'SELECT * FROM test_coll'
}    

options = {} 
options['enableCrossPartitionQuery'] = True
options['maxItemCount'] = 2

# collection_link = database_link + '/colls/%s' % test_coll['id']

result_iterable = client.QueryDocuments(test_coll['_self'], query, options)

In [42]:
import pprint
pp = pprint.PrettyPrinter(indent=4)

it = iter(result_iterable)
for _ in it:
    pp.pprint(_)
    print('\n')

{   '_attachments': 'attachments/',
    '_etag': '"1f02518b-0000-0000-0000-586e34910000"',
    '_rid': 'lShQAOYrOA0EAAAAAAAAAA==',
    '_self': 'dbs/lShQAA==/colls/lShQAOYrOA0=/docs/lShQAOYrOA0EAAAAAAAAAA==/',
    '_ts': 1483617425,
    'actors': [   'Chris Pratt',
                  'Bryce Dallas Howard',
                  'Irrfan Khan',
                  "Vincent D'Onofrio"],
    'director': ['Colin Trevorrow'],
    'genre': 'Action, Adventure, Sci-Fi',
    'id': '2140a235-a238-46f7-896b-090b87ced864',
    'imdbid': 'tt0369610',
    'language': ['English'],
    'plot': 'A new theme park is built on the original site of Jurassic Park. '
            "Everything is going well until the park's newest attraction - a "
            'genetically modified giant stealth killing machine - escapes '
            'containment and goes on a killing spree.',
    'released': '12 Jun 2015',
    'runtime': '124 min',
    'title': 'Jurassic World',
    'titleid': 1,
    'year': '2015'}


{   '_attachment

### Delete the collection (which also deletes the documents)

In [31]:
database_link = test_db['_self']

# Build a collection link from the collection id
collection_link = test_coll['_self']

# Delete the collection, deleting also all of the documents contained wherein
del_coll = client.DeleteCollection(collection_link)

# We could also have queried for this collection and created the collection link from that

# List existing collections
list(client.ReadCollections(test_db['_self']))

[]