In [None]:
!pip install pymongo

In [None]:
from pymongo import MongoClient

In [None]:
# optimize a framework to perform real-time analytics for dashboards and reports

br = '\n'
client = MongoClient('localhost', port=27017)
db = client.test
cars = db.cars

In [None]:
# results from each stage of pipeline passed onto next stage
# $match and $sort should be 1st in pipeline since they use indexes!
# $match should probably be first because drastically reduces search!

pipe = cars.aggregate(
    [{'$match':{'Cylinders':4}},
     {'$sort': {'Car': 1}},
     {'$project': {'Car': 1, 'Cylinders':1,
                   'Origin':1, 'MPG':1}},
     {'$limit':3}
     ]
    )

[(row['_id'], row['Car'], row['Cylinders'],
  row['Origin']) for row in pipe]

In [None]:
# looks similar to a regular query, but optimized!
# same result, but projecting through elimination

pipe = cars.aggregate(
    [{'$match':{'Cylinders':4}},
     {'$sort': {'Car': 1}},
     {'$project': {'Acceleration': 0, 'Displacement':0,
                   'HP':0, 'Model':0, 'Weight':0}},
     {'$limit':3}
     ]
    )

[(row['_id'], row['Car'], row['Cylinders'],
  row['Origin']) for row in pipe]

In [None]:
# why is this output different from the last two pipes?

pipe = cars.aggregate(
    [{'$match':{'Cylinders':4}},
     {'$sort': {'Car': -1}},
     {'$project': {'Car': 1, 'Cylinders':1,
                   'Origin':1, 'MPG':1}},
     {'$limit':3}
     ]
    )

[(row['_id'], row['Car'], row['Cylinders'],
  row['Origin']) for row in pipe]

In [None]:
# use the '$or' operator
# also use the '$out' operator
# sort by '_id'

pipe = cars.aggregate(
    [{'$match':{'$or': [{'Cylinders':5.0},
                        {'Cylinders':3.0}]}},
     {'$sort': {'_id':1}},
     {'$project':{'Car':1, 'Cylinders':1, 'Origin':1}},
     {'$out':'cyl_odd'}
     ]
    )

In [None]:
# since we created 'cyl_odd' with '$out', we can query it!
# also, check your DB via command line and you will see it created!

q = db.cyl_odd.find()

[(row['_id'], row['Car'], row['Cylinders'],
  row['Origin']) for row in q]

In [None]:
# can also push pipe results into a variable for later use
# in this case, we save the full dictionary results

pipe = cars.aggregate(
    [{'$match':{'Cylinders':6}},
     {'$sort': {'_id': 1}},
     {'$project': {'Car': 1, 'Cylinders':1,
                   'Origin':1, 'MPG':1}},
     {'$limit':10}
     ]
    )

data_dict = [row for row in pipe]

In [None]:
# display first element from 'data_dict'

print (data_dict[0])
print (type(data_dict[0]))

In [None]:
# in this case, we save a slice from each document
# must rerun pipe because the cursor is not persistent!

pipe = cars.aggregate(
    [{'$match':{'Cylinders':6}},
     {'$sort': {'_id': 1}},
     {'$project': {'Car': 1, 'Cylinders':1,
                   'Origin':1, 'MPG':1}},
     {'$limit':10}
     ]
    )

data_slice = [(row['_id'], row['Car'], row['Cylinders'],
               row['Origin']) for row in pipe]

In [None]:
# display first element from 'data_slice'

print (data_slice[0])
print (type(data_slice[0]))

In [None]:
# display data represented as dictionary elements

for i, row in enumerate(data_dict):
    print (i, row)

In [None]:
# display data as slices

for i, row in enumerate(data_slice):
    print (i, row)

In [None]:
# anatomy of a slice result

length = len(data_slice)

for i, row in enumerate(data_slice):
    print (i, row[0], row[1], row[2], row[3])
    if i == length - 1:
        print ()
        print ('slice datatype:', type(row))