In [6]:
import pymongo
import pprint
from connect import uri

In [7]:
# Replace XXXX with your connection URI from the Atlas UI
free_tier_client = pymongo.MongoClient(uri('local'))

In [8]:
# We're using the people-raw dataset from the Cleansing Data with Updates assessment
people = free_tier_client.cleansing["people-raw"]

In [9]:
# This is a helper function to reduce the output of explain to a few key metrics
def distilled_explain(explain_output):
    return {
        'executionTimeMillis': explain_output['executionStats']['executionTimeMillis'],
        'totalDocsExamined'  : explain_output['executionStats']['totalDocsExamined'],
        'nReturned'          : explain_output['executionStats']['nReturned']
    }

In [10]:
query_1_stats = people.find({
  "address.state": "Nebraska",
  "last_name": "Miller",
}).explain()

query_2_stats = people.find({
  "first_name": "Harry",
  "last_name": "Reed"
}).explain()

In [12]:
# This is to provide a baseline for how long it takes to execute these queries
print(distilled_explain(query_1_stats))
print(distilled_explain(query_2_stats))

{'executionTimeMillis': 1513, 'totalDocsExamined': 50474, 'nReturned': 6}
{'executionTimeMillis': 68, 'totalDocsExamined': 50474, 'nReturned': 1}


In [38]:
# Replace "YYYY" with the best index to increase the performance of the two queries above
multi_index = [("last_name", pymongo.DESCENDING), ("first_name", pymongo.ASCENDING), ("address.state", pymongo.ASCENDING)]
single_index = [("last_name", pymongo.DESCENDING)]
people.create_index(single_index)

'last_name_-1'

In [39]:
query_1_stats = people.find({
  "address.state": "Nebraska",
  "last_name": "Miller",
}).explain()

query_2_stats = people.find({
  "first_name": "Harry",
  "last_name": "Reed"
}).explain()

In [40]:
# If everything went well, both queries should now have *much* lower execution times and documents examined
print(distilled_explain(query_1_stats))
print(distilled_explain(query_2_stats))

{'executionTimeMillis': 8, 'totalDocsExamined': 6, 'nReturned': 6}
{'executionTimeMillis': 0, 'totalDocsExamined': 1, 'nReturned': 1}
