# $group

https://mongodb-developer.github.io/aggregation-pipeline-lab/docs/grouping/group

In [1]:
from pymongo import MongoClient

uri = "mongodb://admin:mongodb@localhost:27017/"
client = MongoClient(uri)
library_database = client["library"]

books = library_database["books"]

In [11]:
pipeline = [
    {"$group":{
        "_id": "$year",
        "totalPages": {"$sum": "$pages"}
    }},
    {"$limit": 10}
]

cursor = books.aggregate(pipeline)

for f in cursor:
    print(f"{f}.")


{'_id': 1993, 'totalPages': 66571}.
{'_id': 2018, 'totalPages': 424}.
{'_id': 1985, 'totalPages': 21399}.
{'_id': 2017, 'totalPages': 455}.
{'_id': 2021, 'totalPages': 1202}.
{'_id': 2012, 'totalPages': 2351}.
{'_id': 2020, 'totalPages': 2429}.
{'_id': 2011, 'totalPages': 2224}.
{'_id': 1975, 'totalPages': 6462}.
{'_id': 1982, 'totalPages': 19086}.


### Get the results ordered ascending by year

In [12]:
pipeline = [
    {"$group":{
        "_id": "$year",
        "totalPages": {"$sum": "$pages"}
    }},
    {"$sort": {"_id": 1}},
    {"$limit": 10}
]

cursor = books.aggregate(pipeline)

for f in cursor:
    print(f"{f}.") 

{'_id': None, 'totalPages': 825}.
{'_id': 1899, 'totalPages': 128}.
{'_id': 1900, 'totalPages': 324}.
{'_id': 1930, 'totalPages': 384}.
{'_id': 1935, 'totalPages': 181}.
{'_id': 1936, 'totalPages': 670}.
{'_id': 1939, 'totalPages': 296}.
{'_id': 1948, 'totalPages': 476}.
{'_id': 1949, 'totalPages': 334}.
{'_id': 1951, 'totalPages': 793}.


### Get average pages per year

In [13]:
pipeline = [
    {"$group":{
        "_id": "$year",
        "totalPages": {"$avg": "$pages"}
    }},
    {"$sort": {"_id": 1}},
    {"$limit": 10}
]

cursor = books.aggregate(pipeline)

for f in cursor:
    print(f"{f}.") 

{'_id': None, 'totalPages': 412.5}.
{'_id': 1899, 'totalPages': 128.0}.
{'_id': 1900, 'totalPages': 324.0}.
{'_id': 1930, 'totalPages': 192.0}.
{'_id': 1935, 'totalPages': 181.0}.
{'_id': 1936, 'totalPages': 670.0}.
{'_id': 1939, 'totalPages': 148.0}.
{'_id': 1948, 'totalPages': 238.0}.
{'_id': 1949, 'totalPages': 334.0}.
{'_id': 1951, 'totalPages': 396.5}.


### Which year had the most printed-out pages?

In [14]:
pipeline = [
    {"$group":{
        "_id": "$year",
        "totalPages": {"$sum": "$pages"}
    }},
    {"$sort": {"totalPages": -1}},
    {"$limit": 1}
]

cursor = books.aggregate(pipeline)

for f in cursor:
    print(f"{f}.") 

{'_id': 2001, 'totalPages': 162213}.
