In [1]:
import pymongo # this gives a security warning in some cases

In [3]:
client = pymongo.MongoClient('mongodb://mongo:27017/') # connect to DS
client.database_names() # show names
if 'big_data_processing' in client.database_names(): # if data_science already exists, drop
    client.drop_database('big_data_processing')

In [4]:
db = client.big_data_processing
db

Database(MongoClient(host=['mongo:27017'], document_class=dict, tz_aware=False, connect=True), 'big_data_processing')

In [6]:
import datetime

hw =   {"author": "Etienne",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()}
hw

{'author': 'Etienne',
 'cmd': 'line.lower()',
 'date': datetime.datetime(2018, 2, 19, 11, 48, 50, 764581),
 'hw': 'Word Count',
 'tags': ['MapReduce', 'word', 'count', 'normalization']}

In [9]:
# create a collection
hws = db.hws
# delete everything from hws
db.hws.delete_many({})
# insert one and get ID
hw_id = hws.insert_one(hw).inserted_id
hw_id

ObjectId('5a8ab9a67a9cc3002974c9f1')

In [10]:
# get list of collections 
db.collection_names(include_system_collections=False)

['hws']

In [11]:
# find any
hws.find_one()

{'_id': ObjectId('5a8ab9a67a9cc3002974c9f1'),
 'author': 'Etienne',
 'cmd': 'line.lower()',
 'date': datetime.datetime(2018, 2, 19, 11, 48, 50, 764000),
 'hw': 'Word Count',
 'tags': ['MapReduce', 'word', 'count', 'normalization']}

In [14]:
# these two lines find the same doc
r = hws.find_one({'_id': hw_id})
print(r)

{'_id': ObjectId('5a8ab9a67a9cc3002974c9f1'), 'author': 'Etienne', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2018, 2, 19, 11, 48, 50, 764000), 'hw': 'Word Count', 'cmd': 'line.lower()'}


In [15]:
print(hws.find_one({'tags': 'MapReduce'}))

{'_id': ObjectId('5a8ab9a67a9cc3002974c9f1'), 'author': 'Etienne', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2018, 2, 19, 11, 48, 50, 764000), 'hw': 'Word Count', 'cmd': 'line.lower()'}


In [16]:
# create a list of dictionaries
student_weekend_hws = [ {"author": "Simone",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()},
        
       {"author": "Joseph",
        "hw"     : "MongoDB",
        "hrs"    : 1,
        "cmd"    : [ "student_weekend_hws = []", "hws.insert_many(student_weekend_hws)" ],
        "tags"   : ["big", "data", "nosql"],
        "date"   : datetime.datetime.utcnow()} ]

In [17]:
hws.insert_many(student_weekend_hws).inserted_ids

[ObjectId('5a8aba047a9cc3002974c9f2'), ObjectId('5a8aba047a9cc3002974c9f3')]

In [18]:
for hw in hws.find():
    print(hw['author'], hw['date'])
    
print(hws.count())

Etienne 2018-02-19 11:48:50.764000
Simone 2018-02-19 11:50:27.429000
Joseph 2018-02-19 11:50:27.429000
3


In [20]:
# you need to change the following date time for proper querying (depending on
# when the above was run)
d = datetime.datetime(2018, 2, 19, 11, 49, 51, 0)
for h in hws.find({"date": {"$gt": d}}).sort("author"):
    print(h)

{'_id': ObjectId('5a8aba047a9cc3002974c9f3'), 'hrs': 1, 'author': 'Joseph', 'tags': ['big', 'data', 'nosql'], 'date': datetime.datetime(2018, 2, 19, 11, 50, 27, 429000), 'hw': 'MongoDB', 'cmd': ['student_weekend_hws = []', 'hws.insert_many(student_weekend_hws)']}
{'_id': ObjectId('5a8aba047a9cc3002974c9f2'), 'author': 'Simone', 'tags': ['MapReduce', 'word', 'count', 'normalization'], 'date': datetime.datetime(2018, 2, 19, 11, 50, 27, 429000), 'hw': 'Word Count', 'cmd': 'line.lower()'}


In [22]:
# can use remove in the same way
print(db.hws.count())
id = db.hws.find_one()['_id']
db.hws.delete_one({'_id': id})
print(db.hws.count())

2
1


In [24]:
# create index information
db.profiles.delete_many({})
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)
list(db.profiles.index_information())

['_id_', 'user_id_1']

In [25]:
user_profiles = [ {'user_id': 1, 'name': 'Brandon'},
                  {'user_id': 2, 'name': 'Luke'},
                  {'user_id': 3, 'name': 'Ivan'}]
result = db.profiles.insert_many(user_profiles)

In [26]:
db.profiles.count()

3

In [27]:
new_profile = {'user_id': 4, 'name': 'Chris'}
duplicate_profile = {'user_id': 1, 'name': 'Michael'}
result = db.profiles.insert_one(new_profile)  # This is fine.
result = db.profiles.insert_one(duplicate_profile) # this creates error

DuplicateKeyError: E11000 duplicate key error collection: big_data_processing.profiles index: user_id_1 dup key: { : 1 }

In [28]:
# see all user profiles
for p in db.profiles.find():
    print(p)

{'_id': ObjectId('5a8aba9d7a9cc3002974c9f4'), 'user_id': 1, 'name': 'Brandon'}
{'_id': ObjectId('5a8aba9d7a9cc3002974c9f5'), 'user_id': 2, 'name': 'Luke'}
{'_id': ObjectId('5a8aba9d7a9cc3002974c9f6'), 'user_id': 3, 'name': 'Ivan'}
{'_id': ObjectId('5a8abac47a9cc3002974c9f7'), 'user_id': 4, 'name': 'Chris'}


In [29]:
print([ x for x in db.profiles.find({"name": "Ivan"}) ])

[{'_id': ObjectId('5a8aba9d7a9cc3002974c9f6'), 'user_id': 3, 'name': 'Ivan'}]
