In [1]:
import pymongo # this gives a security warning in some cases

In [8]:
db_name = 'my_mongo_db'
client = pymongo.MongoClient('mongodb://mongo:27017/') # connect to DS
print(client.list_database_names()) # show names
if db_name in client.list_database_names(): # if data_science already exists, drop
    client.drop_database(db_name)

['admin', 'config', 'local']


In [9]:
db = client.my_mongo_db
db

Database(MongoClient(host=['mongo:27017'], document_class=dict, tz_aware=False, connect=True), 'my_mongo_db')

In [10]:
import datetime

hw =   {"author": "Etienne",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()}
hw

{'author': 'Etienne',
 'cmd': 'line.lower()',
 'date': datetime.datetime(2018, 11, 20, 16, 10, 25, 403901),
 'hw': 'Word Count',
 'tags': ['MapReduce', 'word', 'count', 'normalization']}

In [11]:
# create a collection
hws = db.hws
# delete everything from hws
db.hws.delete_many({})
# insert one and get ID
hw_id = hws.insert_one(hw).inserted_id
hw_id

ObjectId('5bf431f456eaa9001e666645')

In [14]:
# get list of collections 
db.list_collection_names()

['hws']

In [15]:
# find any
hws.find_one()

{'_id': ObjectId('5bf431f456eaa9001e666645'),
 'author': 'Etienne',
 'cmd': 'line.lower()',
 'date': datetime.datetime(2018, 11, 20, 16, 10, 25, 403000),
 'hw': 'Word Count',
 'tags': ['MapReduce', 'word', 'count', 'normalization']}

In [16]:
# these two lines find the same doc
r = hws.find_one({'_id': hw_id})
print(r)

{'hw': 'Word Count', 'date': datetime.datetime(2018, 11, 20, 16, 10, 25, 403000), 'author': 'Etienne', 'tags': ['MapReduce', 'word', 'count', 'normalization'], '_id': ObjectId('5bf431f456eaa9001e666645'), 'cmd': 'line.lower()'}


In [17]:
print(hws.find_one({'tags': 'MapReduce'}))

{'hw': 'Word Count', 'date': datetime.datetime(2018, 11, 20, 16, 10, 25, 403000), 'author': 'Etienne', 'tags': ['MapReduce', 'word', 'count', 'normalization'], '_id': ObjectId('5bf431f456eaa9001e666645'), 'cmd': 'line.lower()'}


In [18]:
# create a list of dictionaries
student_weekend_hws = [ {"author": "Simone",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()},
        
       {"author": "Joseph",
        "hw"     : "MongoDB",
        "hrs"    : 1,
        "cmd"    : [ "student_weekend_hws = []", "hws.insert_many(student_weekend_hws)" ],
        "tags"   : ["big", "data", "nosql"],
        "date"   : datetime.datetime.utcnow()} ]

In [19]:
hws.insert_many(student_weekend_hws).inserted_ids

[ObjectId('5bf4321256eaa9001e666646'), ObjectId('5bf4321256eaa9001e666647')]

In [24]:
for hw in hws.find():
    print(hw['author'], hw['date'])
    
print(hws.count_documents({})) # notice the empty filter

Etienne 2018-11-20 16:10:25.403000
Simone 2018-11-20 16:10:56.541000
Joseph 2018-11-20 16:10:56.541000
3


In [25]:
# you need to change the following date time for proper querying (depending on
# when the above was run)
d = datetime.datetime(2018, 2, 19, 11, 49, 51, 0)
for h in hws.find({"date": {"$gt": d}}).sort("author"):
    print(h)

{'hw': 'Word Count', 'date': datetime.datetime(2018, 11, 20, 16, 10, 25, 403000), 'author': 'Etienne', 'tags': ['MapReduce', 'word', 'count', 'normalization'], '_id': ObjectId('5bf431f456eaa9001e666645'), 'cmd': 'line.lower()'}
{'hw': 'MongoDB', 'date': datetime.datetime(2018, 11, 20, 16, 10, 56, 541000), 'author': 'Joseph', 'hrs': 1, 'tags': ['big', 'data', 'nosql'], '_id': ObjectId('5bf4321256eaa9001e666647'), 'cmd': ['student_weekend_hws = []', 'hws.insert_many(student_weekend_hws)']}
{'hw': 'Word Count', 'date': datetime.datetime(2018, 11, 20, 16, 10, 56, 541000), 'author': 'Simone', 'tags': ['MapReduce', 'word', 'count', 'normalization'], '_id': ObjectId('5bf4321256eaa9001e666646'), 'cmd': 'line.lower()'}


In [27]:
# can use remove in the same way
print(db.hws.count_documents({}))
id = db.hws.find_one()['_id']
db.hws.delete_one({'_id': id})
print(db.hws.count_documents({}))

2
1


In [28]:
# create index information
db.profiles.delete_many({})
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)
list(db.profiles.index_information())

['user_id_1', '_id_']

In [29]:
user_profiles = [ {'user_id': 1, 'name': 'Brandon'},
                  {'user_id': 2, 'name': 'Luke'},
                  {'user_id': 3, 'name': 'Ivan'}]
result = db.profiles.insert_many(user_profiles)

In [31]:
db.profiles.count_documents({})

3

In [32]:
new_profile = {'user_id': 4, 'name': 'Chris'}
duplicate_profile = {'user_id': 1, 'name': 'Michael'}
result = db.profiles.insert_one(new_profile)  # This is fine.
result = db.profiles.insert_one(duplicate_profile) # this creates error

DuplicateKeyError: E11000 duplicate key error collection: my_mongo_db.profiles index: user_id_1 dup key: { : 1 }

In [33]:
# see all user profiles
for p in db.profiles.find():
    print(p)

{'_id': ObjectId('5bf4329856eaa9001e666648'), 'user_id': 1, 'name': 'Brandon'}
{'_id': ObjectId('5bf4329856eaa9001e666649'), 'user_id': 2, 'name': 'Luke'}
{'_id': ObjectId('5bf4329856eaa9001e66664a'), 'user_id': 3, 'name': 'Ivan'}
{'_id': ObjectId('5bf432a356eaa9001e66664b'), 'user_id': 4, 'name': 'Chris'}


In [34]:
print([ x for x in db.profiles.find({"name": "Ivan"}) ])

[{'_id': ObjectId('5bf4329856eaa9001e66664a'), 'user_id': 3, 'name': 'Ivan'}]
