In [1]:
# install PyMongo with pip
!pip install PyMongo names

You should consider upgrading via the '/Users/whw/.pyenv/versions/3.8.7/bin/python3.8 -m pip install --upgrade pip' command.[0m


In [2]:
# import libs
from datetime import datetime
from pprint import pprint
import random
import time
import uuid

from pymongo import MongoClient
import names
import bson

In [3]:
# get client instance (connect to 127.0.0.1:27017 by default)
client = MongoClient() 

# get database instance
db = client['registration']

# get collection instances (these commands do not create a collection until its first insertion)
students_coll = db['students']
classes_coll = db['classes']

In [4]:
# some prefined variables & mock data generators
DEPARTMENTS = ['CS', 'EE', 'CE']
AGES = list(range(17, 30))

def generate_student_data(_id=None):
    return {
        '_id': _id or bson.ObjectId(),
        'student_id': str(uuid.uuid1()),
        'name': names.get_full_name(),
        'major': random.choice(DEPARTMENTS),
        'minor': None if random.random() > 0.05 else random.choice(DEPARTMENTS),
        'age': random.choice(AGES),
        'graduated': False
    }


### Basic CRUD operations

In [5]:
# Create
# insert one
print('# insertOne')
student_data = generate_student_data()
pprint(student_data)
student_id = student_data['student_id']
print(f"student_id: {student_id}")
updated = students_coll.insert_one(student_data)
pprint(updated.inserted_id)

# insert many
print('\n# insertMany')
students = [
    generate_student_data(),
    generate_student_data(),
    generate_student_data(),
    generate_student_data(),
    generate_student_data(),
]
updated = students_coll.insert_many(students)
pprint(updated.inserted_ids)

# insertOne
{'_id': ObjectId('619716a3676ddf3503c4bf03'),
 'age': 22,
 'graduated': False,
 'major': 'CE',
 'minor': None,
 'name': 'James Carter',
 'student_id': 'd7ae9242-48e6-11ec-9460-784f435de619'}
student_id: d7ae9242-48e6-11ec-9460-784f435de619
ObjectId('619716a3676ddf3503c4bf03')

# insertMany
[ObjectId('619716a3676ddf3503c4bf04'),
 ObjectId('619716a4676ddf3503c4bf05'),
 ObjectId('619716a4676ddf3503c4bf06'),
 ObjectId('619716a4676ddf3503c4bf07'),
 ObjectId('619716a4676ddf3503c4bf08')]


In [6]:
# Read
# find one 
print('\n# findOne')
student = students_coll.find_one({'student_id': student_id})
pprint(student)

# find one (return selected field only)
student = students_coll.find_one(
    {'student_id': student_id},
    {
        '_id': 0,
        'name': 1,
        'major': 1, 
    },
)
pprint(student)

# find many
print('\n# findMany')
students = students_coll.find(
    {'major': 'CS'},
    {
        '_id': 0,
        'name': 1,
        'major': 1, 
        'graduated': 1, 
    }
).limit(2)
pprint(students)
pprint(list(students))


# findOne
{'_id': ObjectId('619716a3676ddf3503c4bf03'),
 'age': 22,
 'graduated': False,
 'major': 'CE',
 'minor': None,
 'name': 'James Carter',
 'student_id': 'd7ae9242-48e6-11ec-9460-784f435de619'}
{'major': 'CE', 'name': 'James Carter'}

# findMany
<pymongo.cursor.Cursor object at 0x1094537c0>
[{'graduated': True, 'major': 'CS', 'name': 'Elida Maldonado'},
 {'graduated': True, 'major': 'CS', 'name': 'Leslie Kuehn'}]


In [7]:
# Update
# update one 
print('\n# updateOne')
updated = students_coll.update_one(
    {'student_id': student_id},
    {'$set': {'name': 'Matt Wang'}, '$inc': {'age': 3}}
)
pprint(updated.raw_result)
pprint(students_coll.find_one({'student_id': student_id}))

# update all
print('\n# updateMany')
filter_cond = {'age': {'$gt': 20}}
updated = students_coll.update_many(
    filter_cond,
    {'$set': {'graduated': True}}
)
pprint(updated.raw_result)
pprint(list(students_coll.find(filter_cond).limit(3)))


# updateOne
{'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}
{'_id': ObjectId('619716a3676ddf3503c4bf03'),
 'age': 25,
 'graduated': False,
 'major': 'CE',
 'minor': None,
 'name': 'Matt Wang',
 'student_id': 'd7ae9242-48e6-11ec-9460-784f435de619'}

# updateMany
{'n': 90942, 'nModified': 4, 'ok': 1.0, 'updatedExisting': True}
[{'_id': ObjectId('6196f7987dbe6980a1d13875'),
  'age': 27,
  'graduated': True,
  'major': 'EE',
  'minor': None,
  'name': 'Richard Broadwell',
  'student_id': '566f3d10-48d4-11ec-92d8-acde48001122'},
 {'_id': ObjectId('6196f7987dbe6980a1d13877'),
  'age': 24,
  'graduated': True,
  'major': 'EE',
  'minor': None,
  'name': 'Raymond Cox',
  'student_id': '5672aa4a-48d4-11ec-92d8-acde48001122'},
 {'_id': ObjectId('6196f7987dbe6980a1d13878'),
  'age': 23,
  'graduated': True,
  'major': 'CS',
  'minor': None,
  'name': 'Leslie Kuehn',
  'student_id': '5672b968-48d4-11ec-92d8-acde48001122'}]


In [8]:
# Delete
# delete one 
print('\n# deleteOne')
cond = {'name': 'Matt Wang'}
updated = students_coll.delete_one(cond)
pprint(updated.raw_result)
pprint(students_coll.find_one(cond))

# delete all
print('\n# deleteMany')
cond = {'age': {'$lte': 18}}
updated = students_coll.delete_many(cond)
pprint(updated.raw_result)
pprint(students_coll.find_one(cond))


# deleteOne
{'n': 1, 'ok': 1.0}
{'_id': ObjectId('619716a3676ddf3503c4bf03'),
 'age': 25,
 'graduated': True,
 'major': 'CE',
 'minor': None,
 'name': 'Matt Wang',
 'student_id': 'd7ae9242-48e6-11ec-9460-784f435de619'}

# deleteMany
{'n': 1, 'ok': 1.0}
None


### `bulkWrite()` in PyMongo

[doc](https://pymongo.readthedocs.io/en/stable/examples/bulk.html)

In [9]:
# bulkwrite
from pymongo import InsertOne, UpdateOne, UpdateMany, DeleteOne, DeleteMany # note that there's no InsertMany
from pymongo.errors import BulkWriteError

ops = [
    InsertOne({'name': 'whatever1'}),
    InsertOne({'name': 'whatever2', 'age': -4}),
    UpdateOne({'name': 'whatever2'}, {'$set': {'age': 100}}),
    DeleteMany({'name': {'$regex': "what*"}})
]

try:
    students_coll.bulk_write(ops)
except BulkWriteError as bwe:
    pprint(bwe.details)

### Brief Comparison w/o MongoDB Indexes

In [10]:
# create bulk data in a thread
import threading

def create_bulk_data():
    for _ in range(100):
        data = list(map(lambda _: generate_student_data(), range(10**4)))
        students_coll.insert_many(data)
        
t = threading.Thread(target=create_bulk_data)
t.start()

In [11]:
# without index
now = datetime.now()
print(students_coll.find_one({'student_id': student_id}))
print(f"time lapsed: {(datetime.now() - now).total_seconds()}\n")


{'_id': ObjectId('619716a3676ddf3503c4bf03'), 'student_id': 'd7ae9242-48e6-11ec-9460-784f435de619', 'name': 'Matt Wang', 'major': 'CE', 'minor': None, 'age': 25, 'graduated': True}
time lapsed: 0.117956



In [12]:
# with index
students_coll.create_index([ ("student_id", 1) ])
time.sleep(3)

now = datetime.now()
print(students_coll.find_one({'student_id': student_id}))
print(f"time lapsed: {(datetime.now() - now).total_seconds()}\n")

# delete index
students_coll.drop_index([ ("student_id", 1) ])

{'_id': ObjectId('619716a3676ddf3503c4bf03'), 'student_id': 'd7ae9242-48e6-11ec-9460-784f435de619', 'name': 'Matt Wang', 'major': 'CE', 'minor': None, 'age': 25, 'graduated': True}
time lapsed: 0.014705



### Clean up

In [13]:
# clean up db data
# client.drop_database('registration')

In [14]:
# clean up dependencies
# !pip uninstall PyMongo names -y