In [3]:
from pymongo import MongoClient

In [4]:
client = MongoClient()
db = client.aggregation
insert_docs = db.docs.insert_many([
    {"x": 1, "tags": ["dog", "cat"]},
    {"x": 2, "tags": ["cat"]},
    {"x": 2, "tags": ["mouse", "cat", "dog"]},
    {"x": 3, "tags": []}])

In [5]:
insert_docs.inserted_ids

[ObjectId('5a52e925b6c97a381010273a'),
 ObjectId('5a52e925b6c97a381010273b'),
 ObjectId('5a52e925b6c97a381010273c'),
 ObjectId('5a52e925b6c97a381010273d')]

In [6]:
from bson.son import SON
import pprint
pipeline = [
    {"$unwind":"$tags"},
    {"$group":{"_id":"$tags","count":{'$sum':1}}},
    #{"$sort": SON([("count", -1), ("_id", -1)])}
]
#aggregate to group by the tag and its count in overall, say how many times 'dog' occured in the doc
pprint.pprint(list(db.docs.aggregate(pipeline)))

[{'_id': 'cat', 'count': 12},
 {'_id': 'mouse', 'count': 4},
 {'_id': 'dog', 'count': 8}]


In [7]:
for a in db.docs.find():
    pprint.pprint(a)

{'_id': ObjectId('5a5120f5b6c97a3648389a69'), 'tags': ['dog', 'cat'], 'x': 1}
{'_id': ObjectId('5a5120f5b6c97a3648389a6a'), 'tags': ['cat'], 'x': 2}
{'_id': ObjectId('5a5120f5b6c97a3648389a6b'),
 'tags': ['mouse', 'cat', 'dog'],
 'x': 2}
{'_id': ObjectId('5a5120f5b6c97a3648389a6c'), 'tags': [], 'x': 3}
{'_id': ObjectId('5a512395b6c97a3648389a6e'), 'tags': ['dog', 'cat'], 'x': 1}
{'_id': ObjectId('5a512395b6c97a3648389a6f'), 'tags': ['cat'], 'x': 2}
{'_id': ObjectId('5a512395b6c97a3648389a70'),
 'tags': ['mouse', 'cat', 'dog'],
 'x': 2}
{'_id': ObjectId('5a512395b6c97a3648389a71'), 'tags': [], 'x': 3}
{'_id': ObjectId('5a51239fb6c97a3648389a73'), 'tags': ['dog', 'cat'], 'x': 1}
{'_id': ObjectId('5a51239fb6c97a3648389a74'), 'tags': ['cat'], 'x': 2}
{'_id': ObjectId('5a51239fb6c97a3648389a75'),
 'tags': ['mouse', 'cat', 'dog'],
 'x': 2}
{'_id': ObjectId('5a51239fb6c97a3648389a76'), 'tags': [], 'x': 3}
{'_id': ObjectId('5a52e925b6c97a381010273a'), 'tags': ['dog', 'cat'], 'x': 1}
{'_id': Ob

### MAP/REDUCE

In [11]:
from bson.code import Code
#defining map function using Code function in java script for MongoDB
mapper = Code("""
              function () {
                this.tags.forEach(function(z) {
                  emit(z, 1);
                 });
               }
             """)

reducer = Code("""
               function(key,values){
                   var total = 0;
                   for (var i = 0; i < values.length; i++) {
                       total += values[i];
                   }
                   return total;
               }
               """)

In [12]:
result = db.docs.map_reduce(mapper,reducer,'myresults')

In [13]:
for doc in result.find():
    pprint.pprint(doc)

{'_id': 'cat', 'value': 12.0}
{'_id': 'dog', 'value': 8.0}
{'_id': 'mouse', 'value': 4.0}


In [14]:
pprint.pprint(db.docs.map_reduce(mapper,reducer,'myresults',full_response=True))

{'counts': {'emit': 24, 'input': 16, 'output': 3, 'reduce': 3},
 'ok': 1.0,
 'result': 'myresults',
 'timeMillis': 36}
