In [1]:
from pymongo import MongoClient

In [2]:
client = MongoClient(port=27017)

In [3]:
db = client.nobel

In [4]:
prize = db.prizes.find_one()

In [5]:
laureate = db.laureates.find_one()

In [6]:
prize

{'_id': ObjectId('613e30001d9ba63fef1deae7'),
 'year': '2018',
 'category': 'physics',
 'overallMotivation': '“for groundbreaking inventions in the field of laser physics”',
 'laureates': [{'id': '960',
   'firstname': 'Arthur',
   'surname': 'Ashkin',
   'motivation': '"for the optical tweezers and their application to biological systems"',
   'share': '2'},
  {'id': '961',
   'firstname': 'Gérard',
   'surname': 'Mourou',
   'motivation': '"for their method of generating high-intensity, ultra-short optical pulses"',
   'share': '4'},
  {'id': '962',
   'firstname': 'Donna',
   'surname': 'Strickland',
   'motivation': '"for their method of generating high-intensity, ultra-short optical pulses"',
   'share': '4'}]}

In [7]:
laureate

{'_id': ObjectId('613e315452d9d355d2bc17b6'),
 'id': '102',
 'firstname': 'Aage Niels',
 'surname': 'Bohr',
 'born': '1922-06-19',
 'died': '2009-09-08',
 'bornCountry': 'Denmark',
 'bornCountryCode': 'DK',
 'bornCity': 'Copenhagen',
 'diedCountry': 'Denmark',
 'diedCountryCode': 'DK',
 'diedCity': 'Copenhagen',
 'gender': 'male',
 'prizes': [{'year': '1975',
   'category': 'physics',
   'share': '3',
   'motivation': '"for the discovery of the connection between collective motion and particle motion in atomic nuclei and the development of the theory of the structure of the atomic nucleus based on this connection"',
   'affiliations': [{'name': 'Niels Bohr Institute',
     'city': 'Copenhagen',
     'country': 'Denmark'}]}]}

In [8]:
list(prize.keys())

['_id', 'year', 'category', 'overallMotivation', 'laureates']

In [9]:
list(laureate.keys())

['_id',
 'id',
 'firstname',
 'surname',
 'born',
 'died',
 'bornCountry',
 'bornCountryCode',
 'bornCity',
 'diedCountry',
 'diedCountryCode',
 'diedCity',
 'gender',
 'prizes']

In [10]:
# You want to examine the laureates of the 1903 prize in physics and how they split the prize. Here is a query without projection:
# Which projection(s) will fetch ONLY the laureates' full names and prize share info?
db.laureates.find_one({"prizes": {"$elemMatch": {"category": "physics", "year": "1903"}}}, ["firstname", "surname", "prizes.share"])
db.laureates.find_one({"prizes": {"$elemMatch": {"category": "physics", "year": "1903"}}}, {"firstname": 1, "surname": 1, "prizes.share": 1, "_id": 0})

{'firstname': 'Antoine Henri',
 'surname': 'Becquerel',
 'prizes': [{'share': '2'}]}

In [11]:
# Find laureates whose first name starts with "G" and last name starts with "S"
docs = db.laureates.find(filter={"firstname" : {"$regex" : "^G"}, "surname" : {"$regex" : "^S"}}, projection={"firstname": 1, "surname": 1, "_id": 0})
docs[0]

# Iterate over docs and concatenate first name and surname
full_names = [doc["firstname"] + " " + doc["surname"]  for doc in docs]

# Print the full names
print(full_names)

['George D. Snell', 'Gustav Stresemann', 'Glenn Theodore Seaborg', 'George J. Stigler', 'George F. Smoot', 'George E. Smith', 'George P. Smith', 'George Bernard Shaw', 'Giorgos Seferis']


In [12]:
# Save documents, projecting out laureates share
prizes = db.prizes.find({}, ["laureates.share"])

# Iterate over prizes
for prize in prizes:
    # Initialize total share
    total_share = 0
    
    # Iterate over laureates for the prize
    for laureate in prize["laureates"]:
        # add the share of the laureate to total_share
        total_share += 1 / float(laureate["share"])
        
    # Print the total share if it != 1
    if total_share != 1.0:
        print(total_share)

In [13]:
# What the sort?
docs = list(db.laureates.find(
    {"born": {"$gte": "1900"}, "prizes.year": {"$gte": "1954"}},
    {"born": 1, "prizes.year": 1, "_id": 0},
    sort=[("prizes.year", 1), ("born", -1)]))
for doc in docs[:5]:
    print(doc)

{'born': '1916-08-25', 'prizes': [{'year': '1954'}]}
{'born': '1915-06-15', 'prizes': [{'year': '1954'}]}
{'born': '1901-02-28', 'prizes': [{'year': '1954'}, {'year': '1962'}]}
{'born': '1913-07-12', 'prizes': [{'year': '1955'}]}
{'born': '1911-01-26', 'prizes': [{'year': '1955'}]}


In [14]:
from operator import itemgetter

def all_laureates(prize):  
    # sort the laureates by surname
    sorted_laureates = sorted(prize["laureates"], key=itemgetter("surname"))
    surnames = [laureate["surname"] for laureate in sorted_laureates]
    all_names = " and ".join(surnames)
    
    return all_names

# test the function on a sample doc
print(all_laureates(db.prizes.find_one()))

Ashkin and Mourou and Strickland


In [15]:
# find physics prizes, project year and name, and sort by year
docs = db.prizes.find(
           filter= {"category": "physics"}, 
           projection= ["year", "laureates.firstname", "laureates.surname"], 
           sort= [("year", 1)])

# print the year and laureate names (from all_laureates)
["{year}: {names}".format(year=doc["year"], names=all_laureates(doc)) for doc in docs][:3]

['1901: Röntgen',
 '1902: Lorentz and Zeeman',
 '1903: Becquerel and Curie and Curie, née Sklodowska']

In [16]:
# original categories from 1901
original_categories = db.prizes.distinct("category", {"year": "1901"})
print(original_categories)

# project year and category, and sort
docs = db.prizes.find(
        filter={},
        projection= {"year": 1, "category": 1, "_id": 0},
        sort=[("year", -1), ("category", 1)]
)
docs[1]

['chemistry', 'literature', 'medicine', 'peace', 'physics']


{'year': '2018', 'category': 'economics'}

In [17]:
# Specify an index model for compound sorting
index_model = [("category", 1), ("year", -1)]
db.prizes.create_index(index_model)

'category_1_year_-1'

In [18]:
# Collect the last single-laureate year for each category
report = ""
for category in sorted(db.prizes.distinct("category")):
    doc = db.prizes.find_one(
        {"category": category, "laureates.share": "1"},
        sort=[("year", -1)]
    )
    report += "{category}: {year}\n".format(**doc)

print(report)

chemistry: 2011
economics: 2017
literature: 2017
medicine: 2016
peace: 2017
physics: 1992



In [19]:
from collections import Counter

# Ensure an index on country of birth
db.laureates.create_index([("bornCountry", 1)])

# Collect a count of laureates for each country of birth
n_born_and_affiliated = {
    country: db.laureates.count_documents({
        "bornCountry": country,
        "prizes.affiliations.country": country
    })
    for country in db.laureates.distinct("bornCountry")
}

five_most_common = Counter(n_born_and_affiliated).most_common(5)
print(five_most_common)

[('USA', 241), ('United Kingdom', 56), ('France', 26), ('Germany', 19), ('Japan', 17)]


In [20]:
list(db.prizes.find({"category": "economics"},
                    {"year": 1, "_id": 0})
     .sort("year")
     .limit(3)
     .limit(5))

[{'year': '1969'},
 {'year': '1970'},
 {'year': '1971'},
 {'year': '1972'},
 {'year': '1973'}]

In [28]:
from pprint import pprint
# Fetch prizes with quarter-share laureate(s)
filter_ = {"laureates.share": "4"}

# Save the list of field names
projection = ["category", "year", "laureates.motivation"]

# Save a cursor to yield the first five prizes
cursor = db.prizes.find(filter_, projection).sort("year").limit(5)
pprint(list(cursor))

[{'_id': ObjectId('613e30001d9ba63fef1decdc'),
  'category': 'physics',
  'laureates': [{'motivation': '"in recognition of the extraordinary services '
                               'he has rendered by his discovery of '
                               'spontaneous radioactivity"'},
                {'motivation': '"in recognition of the extraordinary services '
                               'they have rendered by their joint researches '
                               'on the radiation phenomena discovered by '
                               'Professor Henri Becquerel"'},
                {'motivation': '"in recognition of the extraordinary services '
                               'they have rendered by their joint researches '
                               'on the radiation phenomena discovered by '
                               'Professor Henri Becquerel"'}],
  'year': '1903'},
 {'_id': ObjectId('613e30001d9ba63fef1dec83'),
  'category': 'chemistry',
  'laureates': [{'motivation':

In [29]:
from pprint import pprint

# Write a function to retrieve a page of data
def get_particle_laureates(page_number=1, page_size=3):
    if page_number < 1 or not isinstance(page_number, int):
        raise ValueError("Pages are natural numbers (starting from 1).")
    particle_laureates = list(
        db.laureates.find(
            {"prizes.motivation": {"$regex": "particle"}},
            ["firstname", "surname", "prizes"])
        .sort([("prizes.year", 1), ("surname", 1)])
        .skip(page_size * (page_number - 1))
        .limit(page_size))
    return particle_laureates

# Collect and save the first nine pages
pages = [get_particle_laureates(page_number=page) for page in range(1,9)]
pprint(pages[0])

[{'_id': ObjectId('613e315452d9d355d2bc1812'),
  'firstname': 'Charles Thomson Rees',
  'prizes': [{'affiliations': [{'city': 'Cambridge',
                                'country': 'United Kingdom',
                                'name': 'University of Cambridge'}],
              'category': 'physics',
              'motivation': '"for his method of making the paths of '
                            'electrically charged particles visible by '
                            'condensation of vapour"',
              'share': '2',
              'year': '1927'}],
  'surname': 'Wilson'},
 {'_id': ObjectId('613e315452d9d355d2bc1827'),
  'firstname': 'Sir John Douglas',
  'prizes': [{'affiliations': [{'city': 'Harwell, Berkshire',
                                'country': 'United Kingdom',
                                'name': 'Atomic Energy Research '
                                        'Establishment'}],
              'category': 'physics',
              'motivation': '"for their pione