In [1]:
pip install pymongo

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Install the pymongo Python Package 
# !pip3 install pymongo

from pymongo import MongoClient
from pprint import pprint
import requests

In [3]:
# Client connects to "localhost" by default 
client = MongoClient("127.0.0.1:27017")

In [4]:
# Create local "bipm" database on the fly 
db = client['bipm']

In [5]:
# When we rerun the whole notebook, we start from scratch 
# by dropping the colection "courses"
db.courses.drop()

In [6]:
# Create a Python Dictonary
courses = [
    {'title': 'Data Science',
     'lecturer': {
         'name': 'Markus Löcher',
         'department': 'Math',
         'status': 'Professor'
     },
     'semester': 1},
    {'title': 'Data Warehousing',
     'lecturer': {
         'name': 'Roland M. Mueller',
         'department': 'Information Systems',
         'status': 'Professor'
     },
     'semester': 1},
    {'title': 'Business Process Management',
     'lecturer': {
         'name': 'Frank Habermann',
         'department': 'Information Systems',
         'status': 'Professor'
     },
     'semester': 1},
    {'title': 'Stratigic Issues of IT',
     'lecturer': {
         'name': 'Sven Pohland',
         'department': 'Information Systems',
         'status': 'Professor'
     },
     'semester': 1},
    {'title': 'Text, Web and Social Media Analytics Lab',
     'lecturer': {
         'name': 'Markus Löcher',
         'department': 'Math',
         'status': 'Professor'
     },
     'semester': 2},
    {'title': 'Enterprise Architectures for Big Data',
     'lecturer': {
         'name': 'Roland M. Mueller',
         'department': 'Information Systems',
         'status': 'Professor'
     },
     'semester': 2},
    {'title': 'Business Process Integration Lab',
     'lecturer': {
         'name': 'Frank Habermann',
         'department': 'Information Systems',
         'status': 'Professor'
     },
     'semester': 2},
    {'title': 'IT-Security and Privacy',
     'lecturer': {
         'name': 'Dennis Uckel',
         'department': 'Information Systems',
         'status': 'External'
     },
     'semester': 2},
    {'title': 'Research Methods',
     'lecturer': {
         'name': 'Marcus Birkenkrahe',
         'department': 'Information Systems',
         'status': 'Professor'
     },
     'semester': 2},
]

In [7]:
pprint(courses)

[{'lecturer': {'department': 'Math',
               'name': 'Markus Löcher',
               'status': 'Professor'},
  'semester': 1,
  'title': 'Data Science'},
 {'lecturer': {'department': 'Information Systems',
               'name': 'Roland M. Mueller',
               'status': 'Professor'},
  'semester': 1,
  'title': 'Data Warehousing'},
 {'lecturer': {'department': 'Information Systems',
               'name': 'Frank Habermann',
               'status': 'Professor'},
  'semester': 1,
  'title': 'Business Process Management'},
 {'lecturer': {'department': 'Information Systems',
               'name': 'Sven Pohland',
               'status': 'Professor'},
  'semester': 1,
  'title': 'Stratigic Issues of IT'},
 {'lecturer': {'department': 'Math',
               'name': 'Markus Löcher',
               'status': 'Professor'},
  'semester': 2,
  'title': 'Text, Web and Social Media Analytics Lab'},
 {'lecturer': {'department': 'Information Systems',
               'name': 'Roland M. Mu

In [8]:
db.courses.insert_many(courses)

<pymongo.results.InsertManyResult at 0x214ee33f7c8>

In [9]:
# TODO: Print all courses
courses = db.courses.find()

for document in courses:
    pprint(document)

{'_id': ObjectId('5ed1698b4bc6f46a03137c1c'),
 'lecturer': {'department': 'Math',
              'name': 'Markus Löcher',
              'status': 'Professor'},
 'semester': 1,
 'title': 'Data Science'}
{'_id': ObjectId('5ed1698b4bc6f46a03137c1d'),
 'lecturer': {'department': 'Information Systems',
              'name': 'Roland M. Mueller',
              'status': 'Professor'},
 'semester': 1,
 'title': 'Data Warehousing'}
{'_id': ObjectId('5ed1698b4bc6f46a03137c1e'),
 'lecturer': {'department': 'Information Systems',
              'name': 'Frank Habermann',
              'status': 'Professor'},
 'semester': 1,
 'title': 'Business Process Management'}
{'_id': ObjectId('5ed1698b4bc6f46a03137c1f'),
 'lecturer': {'department': 'Information Systems',
              'name': 'Sven Pohland',
              'status': 'Professor'},
 'semester': 1,
 'title': 'Stratigic Issues of IT'}
{'_id': ObjectId('5ed1698b4bc6f46a03137c20'),
 'lecturer': {'department': 'Math',
              'name': 'Markus Löche

In [10]:
import json
my_json = '{"title": "Master Thesis", "semester": 3}'
another_course = json.loads(my_json)
another_course

{'title': 'Master Thesis', 'semester': 3}

In [11]:
# TODO: Store `another_course` as another course:
db.courses.insert_one(another_course)

<pymongo.results.InsertOneResult at 0x214ee325888>

find_one() and find()

find_one() returns the first match. find()returns all matches.

The query condition for find_one() and find() for an equality match on fields has the following form:

{ <field1>: <value1>, <field2>: <value2>, ... }

The following operation finds the first documents whose name field equals "Manhattan".

cursor = db.restaurants.find_one({"name": "Manhattan"})

In [12]:
# TODO: Find the course with the title "Data Science" 
# save the result in a varibale result
# and pprint the result.

a = db.courses.find({'title':'Data Science'})
for result in a:
    pprint(result)

{'_id': ObjectId('5ed1698b4bc6f46a03137c1c'),
 'lecturer': {'department': 'Math',
              'name': 'Markus Löcher',
              'status': 'Professor'},
 'semester': 1,
 'title': 'Data Science'}


In [13]:
print(result["_id"])
print(result["lecturer"]["name"])

5ed1698b4bc6f46a03137c1c
Markus Löcher


In [14]:
# TODO: Find the first course (one course) in the second semester
# and print it
firstcourse= db.courses.find_one({'semester':2})
for course in firstcourse:
    pprint(course)

'_id'
'title'
'lecturer'
'semester'


In [15]:
# TODO: Find all courses in the second semester
# and print the course titles
coursetitle = db.courses.find({'semester':2})
for titles in coursetitle: 
    pprint (titles['title'])

'Text, Web and Social Media Analytics Lab'
'Enterprise Architectures for Big Data'
'Business Process Integration Lab'
'IT-Security and Privacy'
'Research Methods'


In [16]:
# TODO: Find all courses in the second semester
# and print the lecturers names
coursetitle = db.courses.find({'semester': 2})
for lecturenames in coursetitle:
    pprint(lecturenames['lecturer']['name'])

'Markus Löcher'
'Roland M. Mueller'
'Frank Habermann'
'Dennis Uckel'
'Marcus Birkenkrahe'


Subelements

Sometimes documents contains embedded documents as its elements. To specify a condition on a field in these documents, use the dot notation. Dot notation requires quotes around the whole dotted field name. The following queries for documents whose grades array contains an embedded document with a field grade equal to "B".

cursor = db.restaurants.find({"grades.grade": "B"})

In [17]:
# TODO: Find all courses of Frank Habermann
# and print the title and the semester
Habercourses= db.courses.find({'lecturer.name': 'Frank Habermann'})
for s in Habercourses:
    pprint(s['title'])
    pprint(s['semester'])

'Business Process Management'
1
'Business Process Integration Lab'
2


Logical AND

You can specify a logical conjunction (AND) for a list of query conditions by separating the conditions with a comma in the conditions document.

cursor = db.restaurants.find({"cuisine": "Italian", "address.zipcode": "10075"})

In [18]:
# TODO: Find all courses from Frank Habermann in the second semester
# and print the title and the semester
Habercourses = db.courses.find({'lecturer.name': 'Frank Habermann', 'semester': 2})
for ts in Habercourses: 
    pprint(s['title'])
    pprint(s['semester'])

'Business Process Integration Lab'
2


Logical OR

You can specify a logical disjunction (OR) for a list of query conditions by using the $or query operator.

cursor = db.restaurants.find({"$or": [{"cuisine": "Italian"}, {"address.zipcode": "10075"}]})

In [19]:
# TODO: Find all courses from Frank Habermann or Markus Löcher
# and print the title and the semester
coursesHM = db.courses.find({'$or':[{'lecturer.name':'Frank Habermann'},{'lecturer.name': 'Marcus Löcher'}]})
for s in coursesHM: 
    pprint(s['title'])
    pprint(s['semester'])

'Business Process Management'
1
'Business Process Integration Lab'
2


Greater than, Less than

MongoDB provides operators to specify query conditions, such as comparison operators. Query conditions using operators generally have the following form:

{ <field1>: { <operator1>: <value1> } }

Greater Than Operator ($gt). Query for documents whose grades array contains an embedded document with a field score greater than 30.

cursor = db.restaurants.find({"grades.score": {"$gt": 30}})

Less Than Operator ($lt). Query for documents whose grades array contains an embedded document with a field score less than 10.

cursor = db.restaurants.find({"grades.score": {"$lt": 10}})

In [20]:
# TODO: Find all courses in semester greater than 1
# and print the title and the semester
secsem = db.courses.find({'semester':{'$gt':1}})
for course in secsem:
    pprint(course['title'])
    pprint(course['semester'])

'Text, Web and Social Media Analytics Lab'
2
'Enterprise Architectures for Big Data'
2
'Business Process Integration Lab'
2
'IT-Security and Privacy'
2
'Research Methods'
2
'Master Thesis'
3


Counting

count_documents() works like find() but returns the number of matched documents-

In [21]:
# TODO: How many courses are in the second semester?
# TODO: How many courses are in the second semester?
print(db.courses.count_documents({'semester':2}))

5


Downloading Nobel Prize Winners with an API and storing them in MongoDB¶

In [22]:
#Create local "nobel" database on the fly 
db = client["nobel"]
db.prizes.drop()
db.laureates.drop()
# API documented at https://nobelprize.readme.io/docs/prize 
for collection_name in ["prizes", "laureates"]:
    singular = collection_name[:-1] # the API uses singular
    response = requests.get( "http://api.nobelprize.org/v1/{}.json".format(singular)) 
    documents = response.json()[collection_name] 
    # Create collections on the fly 
    db[collection_name].insert_many(documents)

In [23]:
pprint(db.laureates.find_one())

{'_id': ObjectId('5ed1698b4bc6f46a03137eac'),
 'born': '1845-03-27',
 'bornCity': 'Lennep (now Remscheid)',
 'bornCountry': 'Prussia (now Germany)',
 'bornCountryCode': 'DE',
 'died': '1923-02-10',
 'diedCity': 'Munich',
 'diedCountry': 'Germany',
 'diedCountryCode': 'DE',
 'firstname': 'Wilhelm Conrad',
 'gender': 'male',
 'id': '1',
 'prizes': [{'affiliations': [{'city': 'Munich',
                               'country': 'Germany',
                               'name': 'Munich University'}],
             'category': 'physics',
             'motivation': '"in recognition of the extraordinary services he '
                           'has rendered by the discovery of the remarkable '
                           'rays subsequently named after him"',
             'share': '1',
             'year': '1901'}],
 'surname': 'Röntgen'}


In [24]:
# TODO: Print the first name of the first document
first = db.laureates.find({'id':'1'})
for firstname in first:
    pprint(firstname['firstname'])

'Wilhelm Conrad'


In [25]:
# How many female laureates exists?
fem = db.laureates.count_documents({'gender':'female'})
print(fem)

53


In [26]:
db.laureates.distinct("bornCountry", {"bornCountry": {"$regex": "Germany"}})

['Bavaria (now Germany)',
 'East Friesland (now Germany)',
 'Germany',
 'Germany (now France)',
 'Germany (now Poland)',
 'Germany (now Russia)',
 'Hesse-Kassel (now Germany)',
 'Mecklenburg (now Germany)',
 'Prussia (now Germany)',
 'Schleswig (now Germany)',
 'West Germany (now Germany)',
 'Württemberg (now Germany)']

In [27]:
# TODO: How many laureates are from Germany?
c = db.laureates.distinct("bornCountry", {"bornCountry": {"$regex": "Germany"}})
print(len(c))

12


In [28]:
# TODO: Find all physics nobel laureates that are from Germany
# print the year of the first prize, the first name, and surename
ger = db.laureates.find({'bornCountryCode':'DE', 'prizes.category':'physics'})
for info in ger:
    pprint(info['prizes'][0]['year'])
    pprint(info['firstname'])
    pprint(info['surname'])

'1901'
'Wilhelm Conrad'
'Röntgen'
'1909'
'Ferdinand'
'Braun'
'1914'
'Max'
'von Laue'
'1918'
'Max'
'Planck'
'1919'
'Johannes'
'Stark'
'1921'
'Albert'
'Einstein'
'1925'
'James'
'Franck'
'1925'
'Gustav'
'Hertz'
'1932'
'Werner'
'Heisenberg'
'1954'
'Walther'
'Bothe'
'1955'
'Polykarp'
'Kusch'
'1961'
'Rudolf'
'Mössbauer'
'1963'
'J. Hans D.'
'Jensen'
'1978'
'Arno'
'Penzias'
'1986'
'Ernst'
'Ruska'
'1986'
'Gerd'
'Binnig'
'1987'
'J. Georg'
'Bednorz'
'1988'
'Jack'
'Steinberger'
'1989'
'Hans G.'
'Dehmelt'
'1989'
'Wolfgang'
'Paul'
'1998'
'Horst L.'
'Störmer'
'2000'
'Herbert'
'Kroemer'
'2001'
'Wolfgang'
'Ketterle'
'2005'
'Theodor W.'
'Hänsch'
'2017'
'Rainer'
'Weiss'


In [29]:
# TODO: find and print the document for "Malala" (firstname)
malala = db.laureates.find({'firstname':'Malala'})
for name in malala:
    pprint(name)

{'_id': ObjectId('5ed1698b4bc6f46a0313821c'),
 'born': '1997-07-12',
 'bornCity': 'Mingora',
 'bornCountry': 'Pakistan',
 'bornCountryCode': 'PK',
 'died': '0000-00-00',
 'firstname': 'Malala',
 'gender': 'female',
 'id': '914',
 'prizes': [{'affiliations': [[]],
             'category': 'peace',
             'motivation': '"for their struggle against the suppression of '
                           'children and young people and for the right of all '
                           'children to education"',
             'share': '2',
             'year': '2014'}],
 'surname': 'Yousafzai'}
