In [1]:
from pymongo import MongoClient

In [2]:
client = MongoClient(port=27017)

In [3]:
db = client.nobel

In [5]:
prize = db.prizes.find_one()

In [6]:
laureate = db.laureates.find_one()

In [7]:
prize

{'_id': ObjectId('613e30001d9ba63fef1deae7'),
 'year': '2018',
 'category': 'physics',
 'overallMotivation': '“for groundbreaking inventions in the field of laser physics”',
 'laureates': [{'id': '960',
   'firstname': 'Arthur',
   'surname': 'Ashkin',
   'motivation': '"for the optical tweezers and their application to biological systems"',
   'share': '2'},
  {'id': '961',
   'firstname': 'Gérard',
   'surname': 'Mourou',
   'motivation': '"for their method of generating high-intensity, ultra-short optical pulses"',
   'share': '4'},
  {'id': '962',
   'firstname': 'Donna',
   'surname': 'Strickland',
   'motivation': '"for their method of generating high-intensity, ultra-short optical pulses"',
   'share': '4'}]}

In [8]:
laureate

{'_id': ObjectId('613e315452d9d355d2bc17b6'),
 'id': '102',
 'firstname': 'Aage Niels',
 'surname': 'Bohr',
 'born': '1922-06-19',
 'died': '2009-09-08',
 'bornCountry': 'Denmark',
 'bornCountryCode': 'DK',
 'bornCity': 'Copenhagen',
 'diedCountry': 'Denmark',
 'diedCountryCode': 'DK',
 'diedCity': 'Copenhagen',
 'gender': 'male',
 'prizes': [{'year': '1975',
   'category': 'physics',
   'share': '3',
   'motivation': '"for the discovery of the connection between collective motion and particle motion in atomic nuclei and the development of the theory of the structure of the atomic nucleus based on this connection"',
   'affiliations': [{'name': 'Niels Bohr Institute',
     'city': 'Copenhagen',
     'country': 'Denmark'}]}]}

In [11]:
list(prize.keys())

['_id', 'year', 'category', 'overallMotivation', 'laureates']

In [12]:
list(laureate.keys())

['_id',
 'id',
 'firstname',
 'surname',
 'born',
 'died',
 'bornCountry',
 'bornCountryCode',
 'bornCity',
 'diedCountry',
 'diedCountryCode',
 'diedCity',
 'gender',
 'prizes']

In [13]:
db.prizes.distinct("category")

['chemistry', 'economics', 'literature', 'medicine', 'peace', 'physics']

In [14]:
db.laureates.distinct("prizes.category")

['chemistry', 'economics', 'literature', 'medicine', 'peace', 'physics']

In [18]:
set(db.prizes.distinct("category")) == set(db.laureates.distinct("prizes.category"))

True

In [20]:
# Countries recorded as countries of death but not as countries of birth
countries = set(db.laureates.distinct("diedCountry")) - set(db.laureates.distinct("bornCountry"))
countries

{'Barbados',
 'Czechoslovakia',
 'East Germany',
 'Gabon',
 'Greece',
 'Israel',
 'Jamaica',
 'Northern Rhodesia (now Zambia)',
 'Philippines',
 'Puerto Rico',
 'Tunisia',
 'USSR',
 'Yugoslavia (now Serbia)'}

In [22]:
# The number of distinct countries of laureate affiliation for prizes
count = len(db.laureates.distinct("prizes.affiliations.country"))
count

29

In [26]:
db.laureates.distinct("prizes.affiliations.country", {"bornCountry": "USA"})

['Australia', 'Denmark', 'USA', 'United Kingdom']

In [25]:
db.laureates.find_one({"bornCountry": "USA"})

{'_id': ObjectId('613e315452d9d355d2bc17b7'),
 'id': '95',
 'firstname': 'Leon Neil',
 'surname': 'Cooper',
 'born': '1930-02-28',
 'died': '0000-00-00',
 'bornCountry': 'USA',
 'bornCountryCode': 'US',
 'bornCity': 'New York, NY',
 'gender': 'male',
 'prizes': [{'year': '1972',
   'category': 'physics',
   'share': '3',
   'motivation': '"for their jointly developed theory of superconductivity, usually called the BCS-theory"',
   'affiliations': [{'name': 'Brown University',
     'city': 'Providence, RI',
     'country': 'USA'}]}]}

In [30]:
# Save a filter for prize documents with three or more laureates
criteria = {"laureates.2": {"$exists": True}}

# Save the set of distinct prize categories in documents satisfying the criteria
triple_play_categories = set(db.prizes.distinct("category", criteria))

# triple_play_categories

# Confirm literature as the only category not satisfying the criteria.
set(db.prizes.distinct("category")) - triple_play_categories == {"literature"}

True

In [33]:
# For reference, the code below determines the number of laureates who won a shared prize in physics after >= 1945.
db.laureates.count_documents({
    "prizes": {"$elemMatch": {
        "category": "physics",
        "share": "1",
        "year": {"$gte": "1945"}}}})

18

In [32]:
# number of laureates who won a shared prize in physics after World War II

In [34]:
db.laureates.count_documents({
    "prizes": {"$elemMatch": {
        "category": "physics",
        "share": {"$ne": "1"},
        "year": {"$gte": "1945"}}}})

143

In [35]:
18/143

0.1258741258741259

In [45]:
# Save a filter for laureates with unshared prizes
unshared = {
    "prizes": {"$elemMatch": {
        "category": {"$nin": ["physics", "chemistry", "medicine"]},
        "share": "1",
        "year": {"$gte": "1945"},
    }}}

# Save a filter for laureates with shared prizes
shared = {
    "prizes": {"$elemMatch": {
        "category": {"$nin": ["physics", "chemistry", "medicine"]},
        "share": {"$ne": "1"},
        "year": {"$gte": "1945"},
    }}}

db.laureates.count_documents(unshared)
db.laureates.count_documents(shared)
ratio = db.laureates.count_documents(unshared) / db.laureates.count_documents(shared)
print(ratio)

1.3653846153846154


In [46]:
db.laureates.distinct("gender")

['female', 'male', 'org']

In [50]:
# Save a filter for organization laureates with prizes won before 1945
before = {
    "gender": "org",
    "prizes.year": {"$lt": "1945"},
    }

# Save a filter for organization laureates with prizes won in or after 1945
in_or_after = {
    "gender": "org",
    "prizes.year": {"$gte": "1945"},
    }

n_before = db.laureates.count_documents(before)
n_in_or_after = db.laureates.count_documents(in_or_after)

n_before
n_in_or_after
ratio = n_in_or_after / (n_in_or_after + n_before)
print(ratio)

0.84


In [53]:
# How many laureates in total have a first name beginning with "G" and a surname beginning with "S"?
from bson import Regex
db.laureates.count_documents({"firstname": Regex("^G"), "surname": Regex("^S")})

9

In [55]:
# Filter for laureates with "Germany" in their "bornCountry" value
criteria = {"bornCountry": Regex("Germany")}
print(set(db.laureates.distinct("bornCountry", criteria)))

{'Mecklenburg (now Germany)', 'West Germany (now Germany)', 'Germany (now Russia)', 'W&uuml;rttemberg (now Germany)', 'East Friesland (now Germany)', 'Germany', 'Hesse-Kassel (now Germany)', 'Germany (now Poland)', 'Schleswig (now Germany)', 'Prussia (now Germany)', 'Bavaria (now Germany)', 'Germany (now France)'}


In [56]:
# Filter for laureates with a "bornCountry" value starting with "Germany"
criteria = {"bornCountry": Regex("^Germany")}
print(set(db.laureates.distinct("bornCountry", criteria)))

{'Germany', 'Germany (now Poland)', 'Germany (now Russia)', 'Germany (now France)'}


In [61]:
# Fill in a string value to be sandwiched between the strings "^Germany " and "now"
criteria = {"bornCountry": Regex("^Germany " + "\(" + "now")}
print(set(db.laureates.distinct("bornCountry", criteria)))

{'Germany (now Poland)', 'Germany (now Russia)', 'Germany (now France)'}


In [64]:
#Filter for currently-Germany countries of birth. Fill in a string value to be sandwiched between the strings "now" and "$"
criteria = {"bornCountry": Regex("now" + " Germany\)" + "$")}
print(set(db.laureates.distinct("bornCountry", criteria)))

{'Mecklenburg (now Germany)', 'West Germany (now Germany)', 'W&uuml;rttemberg (now Germany)', 'East Friesland (now Germany)', 'Hesse-Kassel (now Germany)', 'Schleswig (now Germany)', 'Prussia (now Germany)', 'Bavaria (now Germany)'}


In [65]:
# Save a filter for laureates with prize motivation values containing "transistor" as a substring
criteria = {"prizes.motivation": Regex("transistor")}

# Save the field names corresponding to a laureate's first name and last name
first, last = "firstname", "surname"
print([(laureate[first], laureate[last]) for laureate in db.laureates.find(criteria)])

[('William Bradford', 'Shockley'), ('John', 'Bardeen'), ('Walter Houser', 'Brattain')]
