In [1]:
import pymongo
import pprint

In [2]:
# Replace XXXX with your connection URI from the Atlas UI
free_tier_client = pymongo.MongoClient('localhost:27017')

In [3]:
# We're using the people-raw dataset from the Cleansing Data with Updates assessment
people = free_tier_client.cleansing["people-raw"]

In [4]:
# This is a helper function to reduce the output of explain to a few key metrics
def distilled_explain(explain_output):
    return {
        'executionTimeMillis': explain_output['executionStats']['executionTimeMillis'],
        'totalDocsExamined'  : explain_output['executionStats']['totalDocsExamined'],
        'nReturned'          : explain_output['executionStats']['nReturned']
    }

In [5]:
query_1_stats = people.find({
  "address.state": "Nebraska",
  "last_name": "Miller",
}).explain()

query_2_stats = people.find({
  "first_name": "Harry",
  "last_name": "Reed"
}).explain()

In [6]:
# This is to provide a baseline for how long it takes to execute these queries
print(distilled_explain(query_1_stats))
print(distilled_explain(query_2_stats))

{'executionTimeMillis': 19, 'totalDocsExamined': 50474, 'nReturned': 6}
{'executionTimeMillis': 18, 'totalDocsExamined': 50474, 'nReturned': 1}


In [9]:
# Replace "YYYY" with the best index to increase the performance of the two queries above
people.create_index([("last_name",pymongo.DESCENDING)])

'last_name_-1'

In [10]:
query_1_stats = people.find({
  "address.state": "Nebraska",
  "last_name": "Miller",
}).explain()

query_2_stats = people.find({
  "first_name": "Harry",
  "last_name": "Reed"
}).explain()

In [11]:
# If everything went well, both queries should now have *much* lower execution times and documents examined
print(distilled_explain(query_1_stats))
print(distilled_explain(query_2_stats))

{'executionTimeMillis': 1, 'totalDocsExamined': 533, 'nReturned': 6}
{'executionTimeMillis': 0, 'totalDocsExamined': 114, 'nReturned': 1}
