This is the best reference site for learning pymongo // mongodb

https://pymongo.readthedocs.io/en/stable/tutorial.html

In [1]:
import pymongo.mongo_client
import pymongo.server_api
import os
from pprint import pprint
from datetime import datetime, timezone
import bson 

Use dotenv to load a .env file with the environment variables for jupter notebooks in VSCODE. Be sure to add .env in your gitignore file.

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

Create the DB Connection

In [3]:
mongodb_uri=os.environ['MONGODB_URI']
myclient = pymongo.MongoClient(mongodb_uri, server_api=pymongo.server_api.ServerApi('1'))

In [4]:
try: 
  myclient.admin.command('ping')
  print("Pinged")
except Exception as e:
  print(e)

Pinged


#Create a DB - Databases and Collections aren't created until you insert a document into them.  They are created lazily.

In [5]:
jupdb = myclient["jup_db"]

In [6]:
try: 
  jupdb.admin.command('ping')
  print("Pinged")
except Exception as e:
  print(e)

'Collection' object is not callable. If you meant to call the 'command' method on a 'Collection' object it is failing because no such method exists.


In [7]:
#Create an db entry - a post // document
post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.now(tz=timezone.utc),
}

Running the print of collections and databases shows that they are not yet created

In [8]:
#Print name of collections
my_collections = jupdb.list_collection_names()
for collection in my_collections:
   print(collection)

# print list of databases
print(myclient.list_database_names())

posts
profiles
['jup_db', 'mydatabase', 'sample_mflix', 'admin', 'local']


Once you insert the post into the db collection everything is created.

In [9]:
#Insert post // create db and collection// document into collection called posts in mydb
posts = jupdb.posts
post_id = posts.insert_one(post).inserted_id

In [10]:
#Print name of collections
print(list(jupdb.list_collection_names()))

# print list of databases
print(myclient.list_database_names())

['posts', 'profiles']
['jup_db', 'mydatabase', 'sample_mflix', 'admin', 'local']


In [11]:
#OR print with a for loop
for collection in jupdb.list_collection_names():
   print(collection)

posts
profiles


Inserting multiple posts / or documents -- records in an SQL db
If you run this multiple times it will keep reinserting the same record again.

In [12]:
new_posts = [
    {
        "author": "Mike",
        "text": "Another post!",
        "tags": ["bulk", "insert"],
        "date": datetime(2009, 11, 12, 11, 14),
    },
    {
        "author": "Eliot",
        "title": "MongoDB is fun",
        "text": "and pretty easy too!",
        "date": datetime(2009, 11, 10, 10, 45),
    },
]
result = posts.insert_many(new_posts)

Printing returns a cursor.  Use a list or a for loop to print each posts.

In [13]:
#If you don't use list it returns a cursor. You need to use a for loop to print each one or a list :)
pprint(list(jupdb.posts.find()))
print(jupdb.posts.count_documents({}))
print(jupdb.posts.count_documents({"author": "Eliot"}))

[{'_id': ObjectId('661f24b8a7ada2974d210f68'),
  'author': 'Mike',
  'date': datetime.datetime(2024, 4, 17, 1, 21, 13, 558000),
  'tags': ['mongodb', 'python', 'pymongo'],
  'text': 'My first blog post!'},
 {'_id': ObjectId('661f250ca7ada2974d210f69'),
  'author': 'Mike',
  'date': datetime.datetime(2009, 11, 12, 11, 14),
  'tags': ['bulk', 'insert'],
  'text': 'Another post!'},
 {'_id': ObjectId('66258cb347a32a614aff4e8a'),
  'author': 'Mike',
  'date': datetime.datetime(2024, 4, 21, 22, 1, 22, 785000),
  'tags': ['mongodb', 'python', 'pymongo'],
  'text': 'My first blog post!'},
 {'_id': ObjectId('66258cb447a32a614aff4e8b'),
  'author': 'Mike',
  'date': datetime.datetime(2009, 11, 12, 11, 14),
  'tags': ['bulk', 'insert'],
  'text': 'Another post!'},
 {'_id': ObjectId('66258cb447a32a614aff4e8c'),
  'author': 'Eliot',
  'date': datetime.datetime(2009, 11, 10, 10, 45),
  'text': 'and pretty easy too!',
  'title': 'MongoDB is fun'}]
5
1


In [14]:
#Delete documents
jupdb.posts.delete_many({"author": "Eliot",})

DeleteResult({'n': 1, 'electionId': ObjectId('7fffffff0000000000000104'), 'opTime': {'ts': Timestamp(1713736885, 89), 't': 260}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1713736885, 89), 'signature': {'hash': b'1\x01swL\xca\x8f}\xb424\xf3_\xe7I$\x03{O`', 'keyId': 7304398726519324674}}, 'operationTime': Timestamp(1713736885, 89)}, acknowledged=True)

In [15]:
pprint(list(jupdb.posts.find()))
print(jupdb.posts.count_documents({}))
print(jupdb.posts.count_documents({"author": "Eliot"}))

[{'_id': ObjectId('661f24b8a7ada2974d210f68'),
  'author': 'Mike',
  'date': datetime.datetime(2024, 4, 17, 1, 21, 13, 558000),
  'tags': ['mongodb', 'python', 'pymongo'],
  'text': 'My first blog post!'},
 {'_id': ObjectId('661f250ca7ada2974d210f69'),
  'author': 'Mike',
  'date': datetime.datetime(2009, 11, 12, 11, 14),
  'tags': ['bulk', 'insert'],
  'text': 'Another post!'},
 {'_id': ObjectId('66258cb347a32a614aff4e8a'),
  'author': 'Mike',
  'date': datetime.datetime(2024, 4, 21, 22, 1, 22, 785000),
  'tags': ['mongodb', 'python', 'pymongo'],
  'text': 'My first blog post!'},
 {'_id': ObjectId('66258cb447a32a614aff4e8b'),
  'author': 'Mike',
  'date': datetime.datetime(2009, 11, 12, 11, 14),
  'tags': ['bulk', 'insert'],
  'text': 'Another post!'}]
4
0


In [16]:
print(posts.count_documents({}))

4


query & sort
$lt
$gt

In [17]:
d = datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$gt": d}}).sort("author"):
    pprint(post)

{'_id': ObjectId('661f24b8a7ada2974d210f68'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 4, 17, 1, 21, 13, 558000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('66258cb347a32a614aff4e8a'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 4, 21, 22, 1, 22, 785000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [18]:
for post in posts.find({"date": {"$gt": datetime(2009, 11, 12, 12)}}).sort("author"):
    pprint(post)

{'_id': ObjectId('661f24b8a7ada2974d210f68'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 4, 17, 1, 21, 13, 558000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('66258cb347a32a614aff4e8a'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 4, 21, 22, 1, 22, 785000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


How to create an index

In [19]:
result = jupdb.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)
sorted(list(jupdb.profiles.index_information()))

['_id_', 'user_id_1']

In [22]:
#INSERT PROFILES (This will fail if rerun - duplicate ids)
user_profiles = [{"user_id": 200, "name": "Luke"}, {"user_id": 201, "name": "Ziltoid"}]
result = jupdb.profiles.insert_many(user_profiles)

In [23]:
new_profile = {"user_id": 202, "name": "Drew"}
result = jupdb.profiles.insert_one(new_profile)  # This is fine.

In [24]:
#INSERT DUPLICATE KEY - ERROR
duplicate_profile = {"user_id": 212, "name": "Tommy"}
result = jupdb.profiles.insert_one(duplicate_profile)

DuplicateKeyError: E11000 duplicate key error collection: jup_db.profiles index: user_id_1 dup key: { user_id: 212 }, full error: {'index': 0, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: jup_db.profiles index: user_id_1 dup key: { user_id: 212 }', 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 212}}

In [21]:
print(jupdb.posts.find_one({'_id': bson.ObjectId('661f24b8a7ada2974d210f68')}))

{'_id': ObjectId('661f24b8a7ada2974d210f68'), 'author': 'Mike', 'text': 'My first blog post!', 'tags': ['mongodb', 'python', 'pymongo'], 'date': datetime.datetime(2024, 4, 17, 1, 21, 13, 558000)}
