# Connect to DB via MongoS (our mongos is running at port 27200 on localhost)

In [1]:
import pymongo
cli = pymongo.MongoClient('localhost', 27200)
db = cli.mongoMart
coll = db.restaurants

In [2]:
coll.drop_indexes()

# Covered queries

In [3]:
coll.find_one()

{'_id': ObjectId('60abc72b99e7e0f50266f00f'),
 'name': 'Altius',
 'cuisine': 'Japanese',
 'stars': 3.1,
 'address': {'street': '1178 Ocoge Glen',
  'city': 'Ratizbu',
  'state': 'SD',
  'zipcode': '14557'}}

In [4]:
coll.create_index([
    ('borough', 1),
    ('cuisine', 1),
    ('name', 1)
])

'borough_1_cuisine_1_name_1'

In [5]:
q = coll.find(
    {
        'borough': 'Bronx'
    },
    {
        'cuisine': 1,
        'name': 1
    }
)
list(q.limit(5))

[]

In [6]:
from pprint import pprint
def plan_summary(plan):
    winningPlan = plan['queryPlanner']['winningPlan']
    stages = []
    stage = plan['executionStats']['executionStages']
    while stage:
        inputStage = stage.pop('inputStage', None)
        stages.append(stage)
        stage = inputStage
    stages.reverse()
    print('Execution Stages')
    for stage in stages:
        pprint(stage)
        print('--')

In [7]:
plan_summary(q.explain())

Execution Stages
{'executionTimeMillis': 1,
 'nReturned': 0,
 'shards': [{'allPlansExecution': [],
             'executionStages': {'advanced': 0,
                                 'executionTimeMillisEstimate': 0,
                                 'inputStage': {'advanced': 0,
                                                'executionTimeMillisEstimate': 0,
                                                'inputStage': {'advanced': 0,
                                                               'chunkSkips': 0,
                                                               'executionTimeMillisEstimate': 0,
                                                               'inputStage': {'advanced': 0,
                                                                              'alreadyHasObj': 0,
                                                                              'docsExamined': 0,
                                                                              'executionTimeMillis

In [8]:
q = coll.find(
    {
        'borough': 'Bronx'
    },
    {
        'cuisine': 1,
        'name': 1,
        '_id': 0
    }
)
list(q.limit(5))

[]

In [9]:
plan_summary(q.explain())

Execution Stages
{'executionTimeMillis': 1,
 'nReturned': 0,
 'shards': [{'allPlansExecution': [],
             'executionStages': {'advanced': 0,
                                 'executionTimeMillisEstimate': 0,
                                 'inputStage': {'advanced': 0,
                                                'executionTimeMillisEstimate': 0,
                                                'inputStage': {'advanced': 0,
                                                               'chunkSkips': 0,
                                                               'executionTimeMillisEstimate': 0,
                                                               'inputStage': {'advanced': 0,
                                                                              'alreadyHasObj': 0,
                                                                              'docsExamined': 0,
                                                                              'executionTimeMillis

                                                               'needYield': 0,
                                                               'restoreState': 0,
                                                               'saveState': 0,
                                                               'stage': 'SHARDING_FILTER',
                                                               'works': 1},
                                                'isEOF': 1,
                                                'nReturned': 0,
                                                'needTime': 0,
                                                'needYield': 0,
                                                'restoreState': 0,
                                                'saveState': 0,
                                                'stage': 'PROJECTION_SIMPLE',
                                                'transformBy': {'_id': 0,
                                                           

# Geospatial Indexing

In [10]:
coll.create_index([
    ('address.coord', '2dsphere')  # longitude, latitude 
])

'address.coord_2dsphere'

In [11]:
doc = coll.find_one()

In [12]:
doc

{'_id': ObjectId('60abc72b99e7e0f50266f015'),
 'name': 'Daniel',
 'cuisine': 'Sushi',
 'stars': 1.8,
 'address': {'street': '844 Ozaiti Terrace',
  'city': 'Puugilu',
  'state': 'DC',
  'zipcode': '87625'}}

In [14]:
q = coll.find(
    {'address.coord': {
        '$nearSphere': doc['address']['coord']
    }},
    {'_id': 0, 'name': 1, 'address.coord': 1}
).limit(5)
list(q)

KeyError: 'coord'

In [None]:
point = {
    '$type': 'Point',
    'coordinates': doc['address']['coord']
}
q = coll.find(
    {'address.coord': {'$nearSphere': {'$geometry': point}}},
    {'_id': 0, 'name': 1, 'address.coord': 1}
).limit(5)
list(q)

In [None]:
res = db.command('geoNear', 'restaurants', near=point, spherical=True)
res

In [None]:
for i, doc in enumerate(res['results']):
    if i > 5:
        break
    print('{:.1f} meters away: {}'.format(doc['dis'], doc['obj']['name']))

# Full-text search and indexing

In [15]:
import re
q_re = coll.find({'name': re.compile('^Dunkin')})
q_re.count()

  This is separate from the ipykernel package so we can avoid doing imports until


0

In [16]:
coll.create_index([('name', 'text')])

'name_text'

In [17]:
q_text = coll.find({'$text': {
    '$search': 'Dunkin',
    '$caseSensitive': True
}})
q_text.count()

  """


0

In [None]:
found_with_re = {doc['_id'] for doc in q_re}
found_with_text = {doc['_id'] for doc in q_text}

In [None]:
print('Found with re but not text:')
for _id in found_with_re - found_with_text:
    print(coll.find_one({'_id': _id}, {'name': 1}))
print('Found with text but not re:')
for _id in found_with_text - found_with_re:
    print(coll.find_one({'_id': _id}, {'name': 1}))

In [None]:
coll.drop_indexes()

In [None]:
coll.create_index([("$**", "text")])

In [None]:
q_text = coll.find({'$text': {
    '$search': 'donut'     # finds everything with donut in name, cuisine, borough, or address
}})
q_text.count()

In [None]:
q_text = coll.find({'$text': {
    '$search': 'queen'     # finds everything with queen in name, cuisine, borough, or address (incl. "Queens")
}})
q_text.count()