In [2]:
## To check if MongoDB is installed and determine its version

!mongod --version

db version v5.0.6
Build Info: {
    "version": "5.0.6",
    "gitVersion": "212a8dbb47f07427dae194a9c75baec1d81d9259",
    "modules": [],
    "allocator": "system",
    "environment": {
        "distarch": "x86_64",
        "target_arch": "x86_64"
    }
}


In [4]:
## Install PyMongo

!pip install pymongo

Collecting pymongo
  Using cached pymongo-4.7.2-cp39-cp39-macosx_10_9_x86_64.whl (486 kB)
Collecting dnspython<3.0.0,>=1.16.0
  Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)
[K     |████████████████████████████████| 307 kB 81 kB/s eta 0:00:01
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.6.1 pymongo-4.7.2


In [4]:
## Connecting Python to MongoDB: Basic Connection

from pymongo import MongoClient

# Create a MongoClient to the running mongod instance
client = MongoClient("mongodb://localhost:27017/")

# Attempt to fetch the server information. This operation forces a connection to MongoDB.
try: 
    server_info = client.server_info()  # This command will actually attempt to connect to MongoDB
    print("Connected to MongoDB version:", server_info['version'])
except Exception as e:
    print("Failed to connect to MongoDB:", e)

Failed to connect to MongoDB: localhost:27017: [Errno 61] Connection refused (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 6644d2c7a2bed58b497cb3d4, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [Errno 61] Connection refused (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>


Note for the cell below:

In MongoDB, you don't need to explicitly create a database or a collection before using them. When you specify a database name and a collection name, MongoDB will automatically create the database and the collection if they do not already exist as soon as you insert some data into the collection.

In [8]:
## Access Database and Collection

db = client["sampledb"]
collection = db["users"]
print("Accessed Database and Collections")

Accessed Database and Collections


In [9]:
## Simple Insert

user_data = {"name": "John Doe", "email": "john@example.com"}
result = collection.insert_one(user_data)
print("Inserted User ID:", result.inserted_id)

ServerSelectionTimeoutError: localhost:27017: [Errno 61] Connection refused (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 6644bf9620b6ccb90ff1a3f5, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [Errno 61] Connection refused (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>

In [None]:
## Insert with custom primary key

user_data = {
    "_id": "custom_id_12345",  // Your custom primary key
    "name": "Jane Doe",
    "email": "jane@example.com"
}
result = collection.insert_one(user_data)
print("Inserted User ID:", result.inserted_id)

In [None]:
## Inserting Multiple Documents

users_data = [
    {"name": "Alice", "email": "alice@example.com"},
    {"name": "Bob", "email": "bob@example.com"}
]
results = collection.insert_many(users_data)
print("Inserted IDs:", results.inserted_ids)

In [None]:
## Finding a Single Document

query = {"name": "Jane Doe"}
user = collection.find_one(query)
print("Found User:", user)

In [None]:
## Finding Multiple Documents with Conditions

query = {"email": {"$regex": "@example.com$"}}
users = collection.find(query)
for user in users:
    print(user)

In [None]:
## Updating a Single Document

query = {"name": "Jane Doe"}
new_values = {"$set": {"email": "jane.doe@example.com"}}
collection.update_one(query, new_values)

In [None]:
## Updating Multiple Documents
## updates all documents in the collection where the email field matches the regular expression "@example.com$". 
## This regex checks for emails ending with "@example.com". The update operation sets the field subscriber to True
## for all matching documents.

query = {"email": {"$regex": "@example.com$"}}
new_values = {"$set": {"subscriber": True}}
collection.update_many(query, new_values)

In [None]:
## Deleting a Single Document

query = {"name": "Bob"}
collection.delete_one(query)

In [None]:
## Deleting Multiple Documents
## deletes all documents in the collection where the subscriber field is True

query = {"subscriber": True}
result = collection.delete_many(query)
print("Documents deleted:", result.deleted_count)

In [None]:
## Complete Update Example with print confirmation

from pymongo import MongoClient

# Setup the connection and select the database and collection
client = MongoClient("mongodb://localhost:27017/")
db = client["sampledb"]
collection = db["users"]

# Define the query and update
query = {"email": {"$regex": "@example.com$"}}
new_values = {"$set": {"subscriber": True}}

# Perform the update operation
result = collection.update_many(query, new_values)

# Print the results
print("Matched documents:", result.matched_count)
print("Modified documents:", result.modified_count)

Note for the cell below:

{"age": {"$gt": 21}} searches for documents where the age field has a value greater than 21.
{"email": {"$regex": "@example.com$"}} looks for documents where the email field matches the regular expression @example.com$. The $ at the end of the regex pattern ensures that the email address ends with "@example.com", making it a precise domain match for email fields.
The $and operator is used here to combine these two conditions, ensuring that both conditions must be met for a document to be selected.

In [None]:
## Querying with Conditions

query = {"$and": [{"age": {"$gt": 21}}, {"email": {"$regex": "@example.com$"}}]}
results = collection.find(query)
for result in results:
    print(result)

In [None]:
## Projection in Queries: 
## retrieves documents where the age is greater than 21. It specifies a projection to include only the name and 
## email fields in the results, excluding the MongoDB default _id field. 

query = {"age": {"$gt": 21}}
projection = {"name": 1, "email": 1, "_id": 0}
results = collection.find(query, projection)
for result in results:
    print(result)

In [None]:
## Sorting Results
## finds documents where the age is greater than 21 and sorts the results in ascending order by the name field

from pymongo import ASCENDING, DESCENDING
query = {"age": {"$gt": 21}}
results = collection.find(query).sort("name", ASCENDING)
for result in results:
    print(result)

In [None]:
## Limiting the Number of Documents

query = {"age": {"$gt": 21}}
results = collection.find(query).limit(5)
for result in results:
    print(result)

In [None]:
## Introduction to the Aggregation Pipeline
## defines an aggregation pipeline to process data within the collection:

## "$match": Filters documents to include only those where age is greater than 18.
## "$group": Groups the documents by gender and calculates the average age (averageAge) for each gender group 
##       using the $avg operator.
## "$sort": Sorts the results by averageAge in descending order (-1).

pipeline = [
    {"$match": {"age": {"$gt": 18}}},
    {"$group": {"_id": "$gender", "averageAge": {"$avg": "$age"}}},
    {"$sort": {"averageAge": -1}}
]
results = collection.aggregate(pipeline)
for result in results:
    print(result)

In [None]:
## Single field index

collection.create_index([("email", pymongo.ASCENDING)])

In [None]:
# Compound index

collection.create_index([("lastname", pymongo.ASCENDING), ("firstname", pymongo.ASCENDING)])

In [None]:
## how MongoDB uses indexes in queries and how to verify index usage with the explain method

query = {"lastname": "Doe", "firstname": "John"}
result = collection.find(query).explain()
print(result)