In [1]:
import requests
from pymongo import MongoClient

In [3]:
# Client connects to "localhost" by default
client = MongoClient()

# Create local "nobel" database on the fly
db = client["nobel"]

In [None]:
# for collection_name in ["prizes", "laureates"]:
# # collect the data from the API
#     response = requests.get("http://api.nobelprize.org/v1/{}.json".\
#         format(collection_name[:-1] ))

#     # convert the data to json
#     documents = response.json()[collection_name]

#     # Create collections on the fly
#     db[collection_name].insert_many(documents)


In [38]:
from collections import OrderedDict
from itertools import groupby
from operator import itemgetter

original_categories = set(db.prizes.distinct("category", {"year": "1901"}))

# Save an pipeline to collect original-category prizes
pipeline = [
    {"$match": {"category": {"$in": list(original_categories)}}},
    {"$project": {"year": 1, "category": 1}},
    {"$sort": OrderedDict([("year", -1)])}
]
cursor = db.prizes.aggregate(pipeline)
for key, group in groupby(cursor, key=itemgetter("year")):
    missing = original_categories - {doc["category"] for doc in group}
    if missing:
        print("{year}: {missing}".format(year=key, missing=", ".join(sorted(missing))))

### Sort, Skip, Limit to create page

In [20]:
from pprint import pprint

#select all the laureates with "particle" in prizes motivation and paginate them using sort, skip and limit

# Write a function to retrieve a page of data
def get_particle_laureates(page_number=1, page_size=3):
    if page_number < 1 or not isinstance(page_number, int):
        raise ValueError("Pages are natural numbers (starting from 1).")
    particle_laureates = list(
        db.laureates.find(
            {'prizes.motivation': {"$regex": "particle"}},  ##filter
            ["firstname", "surname", "prizes"])             ##projection
        .sort([("prizes.year", 1), ('surname', 1)])
        .skip(page_size * (page_number - 1))
        .limit(page_size))
    return particle_laureates

# Collect and save the first nine pages
pages = [get_particle_laureates(page_number=page) for page in range(1,9)]
pprint(pages[0])

[{'_id': ObjectId('63d80401a0e8f2e801a5e40c'),
  'firstname': 'C.T.R.',
  'prizes': [{'affiliations': [{'city': 'Cambridge',
                                'country': 'United Kingdom',
                                'name': 'University of Cambridge'}],
              'category': 'physics',
              'motivation': '"for his method of making the paths of '
                            'electrically charged particles visible by '
                            'condensation of vapour"',
              'share': '2',
              'year': '1927'}],
  'surname': 'Wilson'},
 {'_id': ObjectId('63d80401a0e8f2e801a5e422'),
  'firstname': 'John',
  'prizes': [{'affiliations': [{'city': 'Harwell, Berkshire',
                                'country': 'United Kingdom',
                                'name': 'Atomic Energy Research '
                                        'Establishment'}],
              'category': 'physics',
              'motivation': '"for their pioneer work on the transmutati

In [7]:
help(db.laureates.find)

Help on method find in module pymongo.collection:

find(*args: Any, **kwargs: Any) -> pymongo.cursor.Cursor[~_DocumentType] method of pymongo.collection.Collection instance
    Query the database.
    
    The `filter` argument is a query document that all results
    must match. For example:
    
    >>> db.test.find({"hello": "world"})
    
    only matches documents that have a key "hello" with value
    "world".  Matches can have other keys *in addition* to
    "hello". The `projection` argument is used to specify a subset
    of fields that should be included in the result documents. By
    limiting results to a certain subset of fields you can cut
    down on network traffic and decoding time.
    
    Raises :class:`TypeError` if any of the arguments are of
    improper type. Returns an instance of
    :class:`~pymongo.cursor.Cursor` corresponding to this query.
    
    The :meth:`find` method obeys the :attr:`read_preference` of
    this :class:`Collection`.
    
    :Paramete

### Using Aggregation

In [6]:
# Count prizes awarded (at least partly) to organizations as a sum over sizes of "prizes" arrays.
pipeline = [
    {"$match": {"gender": "org"}},
    {"$project": {"n_prizes": {"$size": "$prizes"}}},
    {"$group": {"_id": None, "n_prizes_total": {"$sum": "$n_prizes"}}}
]

print(list(db.laureates.aggregate(pipeline)))

[{'_id': None, 'n_prizes_total': 30}]
