# MongoDB with Python

* See: https://www.w3schools.com/python/python_mongodb_query.asp
* MongoDB stores data in JSON-like BSON documents
* You can download and install a free local MongoDB database at https://www.mongodb.com.
* Or get started right away with a MongoDB cloud service at https://www.mongodb.com/cloud/atlas
* Python needs a MongoDB driver to access the MongoDB database
    -  The ```pymongo``` package is a native Python driver for MongoDB
    - ```python -m pip install pymongo``` or ```conda install -c anaconda pymongo```

* The following code requires you to start ```mongod``` first
* Note that first cell deletes the database for a clean start when we run this notebook again from the top

In [1]:
import pymongo

my_client = pymongo.MongoClient("mongodb://localhost:27017/")

# delete mydatabase if there from previous run of this notebook 
# so that we have a clean start when we run this notebook again from the top
my_client.drop_database("mydatabase")

# See existing databases
print(my_client.list_database_names()) # just the built in ones now (if 'mydatabase' shows here, then execute last cell)

['admin', 'config', 'local']


In [2]:
# creating database named mydatabase
my_client = pymongo.MongoClient("mongodb://localhost:27017/")
my_database = my_client["mydatabase"]

# See existing databases to see if 'mydatabase' is there now
print(my_client.list_database_names()) # Nope. MongoDB only creates database when you first store data in that database

['admin', 'config', 'local']


In [3]:
# creating a collection
my_collection = my_database["customers"]

# check if collection exists
print(my_database.list_collection_names())

# check if database exists
print(my_client.list_database_names())

[]
['admin', 'config', 'local']


In [4]:
# insert document into collection
my_document = { "name_first": "Sally", "name_last": "Jones", "age": 37 }
ior = my_collection.insert_one(my_document) # returns an InsertOneResult object

# return the _id field
print(ior.inserted_id)

# check if database exists
print(my_client.list_database_names()) # 'mydatabase' shows now that we inserted data into it

5f3c3dcec36d2642359e8b80
['admin', 'config', 'local', 'mydatabase']


In [5]:
# insert multiple documents
my_documents = [
  { "name_first": "John", "name_last": "Smith", "age": 22 },
  { "name_first": "Jane", "name_last": "McGill", "age": 34 }
]

imr = my_collection.insert_many(my_documents) # returns an InsertManyResult object
print(imr)

#print list of the _id values of the inserted documents:
print(imr.inserted_ids)

<pymongo.results.InsertManyResult object at 0x0000026CE1AD1088>
[ObjectId('5f3c3dd0c36d2642359e8b81'), ObjectId('5f3c3dd0c36d2642359e8b82')]


In [6]:
# find fist document in collection and print it
first_document = my_collection.find_one() # returns the first document in collection 
print(first_document)

{'_id': ObjectId('5f3c3dcec36d2642359e8b80'), 'name_first': 'Sally', 'name_last': 'Jones', 'age': 37}


In [7]:
# find all documents in collection and print each one in full
# first parameter of the find() method is a query object, second parameter is projection object (defaulted here)
all_documents = my_collection.find() # returns all documents in collection (similar to SELECT * in MySQL) 
for document in all_documents:
    print(document)

{'_id': ObjectId('5f3c3dcec36d2642359e8b80'), 'name_first': 'Sally', 'name_last': 'Jones', 'age': 37}
{'_id': ObjectId('5f3c3dd0c36d2642359e8b81'), 'name_first': 'John', 'name_last': 'Smith', 'age': 22}
{'_id': ObjectId('5f3c3dd0c36d2642359e8b82'), 'name_first': 'Jane', 'name_last': 'McGill', 'age': 34}


In [8]:
# return all documents with some fields (filtering and projection)
all_documents_with_some_fields = my_collection.find({},{ "name_last": 1, "age": 1 }) # no name_first in results
for document in all_documents_with_some_fields:
    print(document)

{'_id': ObjectId('5f3c3dcec36d2642359e8b80'), 'name_last': 'Jones', 'age': 37}
{'_id': ObjectId('5f3c3dd0c36d2642359e8b81'), 'name_last': 'Smith', 'age': 22}
{'_id': ObjectId('5f3c3dd0c36d2642359e8b82'), 'name_last': 'McGill', 'age': 34}


In [9]:
# return some documents with some fields (filtering and projection)
some_documents_with_some_fields = my_collection.find({ "age": 37 },{ "name_last": 1, "age": 1 }) # just name_first with age 37
for document in some_documents_with_some_fields:
    print(document)

{'_id': ObjectId('5f3c3dcec36d2642359e8b80'), 'name_last': 'Jones', 'age': 37}


In [10]:
# more complex query
my_query = { "age": { "$gt": 30 } }
complex_query_result = my_collection.find(my_query) # skip John Smith because age is 22
for result in complex_query_result:
    print(result)

{'_id': ObjectId('5f3c3dcec36d2642359e8b80'), 'name_first': 'Sally', 'name_last': 'Jones', 'age': 37}
{'_id': ObjectId('5f3c3dd0c36d2642359e8b82'), 'name_first': 'Jane', 'name_last': 'McGill', 'age': 34}
