In [1]:
import pymongo # this gives a security warning in some cases

In [2]:
db_name = 'my_mongo_db'
client = pymongo.MongoClient('mongodb://mongo:27017/') # connect to DS
print(client.list_database_names()) # show names
if db_name in client.list_database_names(): # if data_science already exists, drop
    client.drop_database(db_name)

['admin', 'config', 'local']


In [3]:
db = client.my_mongo_db
db

Database(MongoClient(host=['mongo:27017'], document_class=dict, tz_aware=False, connect=True), 'my_mongo_db')

In [4]:
import datetime

hw =   {"author": "Etienne",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()}
hw

{'author': 'Etienne',
 'hw': 'Word Count',
 'cmd': 'line.lower()',
 'tags': ['MapReduce', 'word', 'count', 'normalization'],
 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 669465)}

In [5]:
# create a collection
hws = db.hws
# delete everything from hws
db.hws.delete_many({})
# insert one and get ID
hw_id = hws.insert_one(hw).inserted_id
hw_id

ObjectId('636ccd000c3e64e237e4061b')

In [6]:
# get list of collections 
db.list_collection_names()

['hws']

In [7]:
# find any
hws.find_one()

{'_id': ObjectId('636ccd000c3e64e237e4061b'),
 'author': 'Etienne',
 'hw': 'Word Count',
 'cmd': 'line.lower()',
 'tags': ['MapReduce', 'word', 'count', 'normalization'],
 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 669000)}

In [8]:
# these two lines find the same doc
r = hws.find_one( {'_id': hw_id} )
print(r)

{'_id': ObjectId('636ccd000c3e64e237e4061b'), 'author': 'Etienne', 'hw': 'Word Count', 'cmd': 'line.lower()', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 669000)}


In [9]:
print(hws.find_one({'tags': 'MapReduce'}))

{'_id': ObjectId('636ccd000c3e64e237e4061b'), 'author': 'Etienne', 'hw': 'Word Count', 'cmd': 'line.lower()', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 669000)}


In [10]:
# create a list of dictionaries
student_weekend_hws = [ {"author": "Simone",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()},
        
       {"author": "Joseph",
        "hw"     : "MongoDB",
        "hrs"    : 1,
        "cmd"    : [ "student_weekend_hws = []", "hws.insert_many(student_weekend_hws)" ],
        "tags"   : ["big", "data", "nosql"],
        "date"   : datetime.datetime.utcnow()} ]

In [11]:
hws.insert_many(student_weekend_hws).inserted_ids

[ObjectId('636ccd000c3e64e237e4061c'), ObjectId('636ccd000c3e64e237e4061d')]

In [12]:
for hw in hws.find():
    print(hw['author'], hw['date'])
    
print(hws.count_documents({})) # notice the empty filter

Etienne 2022-11-10 10:05:52.669000
Simone 2022-11-10 10:05:52.753000
Joseph 2022-11-10 10:05:52.753000
3


In [13]:
# you need to change the following date time for proper querying (depending on
# when the above was run)
d = datetime.datetime(2021, 7, 26, 12, 48, 26, 0)
for h in hws.find({"date": {"$gt": d}}).sort("author"):
    print(h)

{'_id': ObjectId('636ccd000c3e64e237e4061b'), 'author': 'Etienne', 'hw': 'Word Count', 'cmd': 'line.lower()', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 669000)}
{'_id': ObjectId('636ccd000c3e64e237e4061d'), 'author': 'Joseph', 'hw': 'MongoDB', 'hrs': 1, 'cmd': ['student_weekend_hws = []', 'hws.insert_many(student_weekend_hws)'], 'tags': ['big', 'data', 'nosql'], 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 753000)}
{'_id': ObjectId('636ccd000c3e64e237e4061c'), 'author': 'Simone', 'hw': 'Word Count', 'cmd': 'line.lower()', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2022, 11, 10, 10, 5, 52, 753000)}


In [14]:
# can use remove in the same way
print(db.hws.count_documents({}))
id = db.hws.find_one()['_id']
db.hws.delete_one({'_id': id})
print(db.hws.count_documents({}))

3
2


In [15]:
# create index information
db.profiles.delete_many({})
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)
list(db.profiles.index_information())

['_id_', 'user_id_1']

In [16]:
user_profiles = [ {'user_id': 1, 'name': 'Brandon'},
                  {'user_id': 2, 'name': 'Luke'},
                  {'user_id': 3, 'name': 'Ivan'}]
result = db.profiles.insert_many(user_profiles)

In [17]:
db.profiles.count_documents({})

3

In [18]:
new_profile = {'user_id': 4, 'name': 'Chris'}
duplicate_profile = {'user_id': 1, 'name': 'Michael'}
result = db.profiles.insert_one(new_profile)  # This is fine.
result = db.profiles.insert_one(duplicate_profile) # this creates error

DuplicateKeyError: E11000 duplicate key error collection: my_mongo_db.profiles index: user_id_1 dup key: { user_id: 1 }, full error: {'index': 0, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: my_mongo_db.profiles index: user_id_1 dup key: { user_id: 1 }', 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 1}}

In [19]:
# see all user profiles
for p in db.profiles.find():
    print(p)

{'_id': ObjectId('636ccd000c3e64e237e4061e'), 'user_id': 1, 'name': 'Brandon'}
{'_id': ObjectId('636ccd000c3e64e237e4061f'), 'user_id': 2, 'name': 'Luke'}
{'_id': ObjectId('636ccd000c3e64e237e40620'), 'user_id': 3, 'name': 'Ivan'}
{'_id': ObjectId('636ccd000c3e64e237e40621'), 'user_id': 4, 'name': 'Chris'}


In [20]:
print([ x for x in db.profiles.find({"name": "Ivan"}) ])

[{'_id': ObjectId('636ccd000c3e64e237e40620'), 'user_id': 3, 'name': 'Ivan'}]
