1. Set Up MongoDB Cluster and Create Connection
Programmatically create a connection to your MongoDB cluster.
Create a new database called OrderManagement.
2. Create Collections and Insert Data
Create the following collections:

Customers
Orders
Deliveries
Supplies
Products
Populate each collection with data from the provided CSV files.

3. Query the Database
Query the Orders collection to get all orders placed by a specific customer (e.g., CustomerID = 1).
Query the Deliveries collection to check the delivery status of a specific order.
Query the Supplies collection to list all products supplied by a specific supplier.

In [1]:
pip install pymongo

Collecting pymongo
  Using cached pymongo-4.8.0-cp310-cp310-macosx_11_0_arm64.whl (592 kB)
Installing collected packages: pymongo
Successfully installed pymongo-4.8.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [50]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import pandas as pd
import pymongo
import certifi
import numpy as np
from pandas.api.types import is_int64_dtype

In [72]:
uri = "mongodb+srv://blanccoa17:Xw13971697807@cluster-de.sia6f.mongodb.net/?retryWrites=true&w=majority&appName=Cluster-de"


# Create a new client and connect to the server
myclient = MongoClient(uri, tlsCAFile = certifi.where())

# myclient = MongoClient(uri, server_api=ServerApi('1'))
# Send a ping to confirm a successful connection
try:
    myclient.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [22]:
mydb = myclient["order-management"]
print(mydb)

Database(MongoClient(host=['cluster-de-shard-00-02.sia6f.mongodb.net:27017', 'cluster-de-shard-00-01.sia6f.mongodb.net:27017', 'cluster-de-shard-00-00.sia6f.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', appname='Cluster-de', authsource='admin', replicaset='atlas-cr8t3y-shard-0', tls=True, tlscafile='/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/certifi/cacert.pem'), 'order-management')


In [24]:
dblist = myclient.list_database_names()
dblist

['order_management', 'sample_mflix', 'admin', 'local']

### Create Collections and Insert Data:

In [60]:
customers = pd.read_csv('Customers.csv')
customers.name = 'customers'
deliveries = pd.read_csv('Deliveries.csv')
deliveries.name = 'deliveries'
orders = pd.read_csv('Orders.csv')
orders.name = 'orders'
products = pd.read_csv('Products.csv')
products.name = 'products'
supplies = pd.read_csv('Supplies.csv')
supplies.name = 'supplies'

In [61]:
def df_to_ls_of_dict(df:pd.DataFrame):
  items = []
  for i in range(df.shape[0]):
      temp_d = dict(df.iloc[i])
      for key in temp_d.keys():
        if type(temp_d[key]) == np.int64:
          temp_d[key] = int(temp_d[key])
      items.append(temp_d)
  return items

In [66]:
df_ls = [customers, deliveries, orders, products, supplies]

for df in df_ls:
    if df.name not in mydb.list_collection_names():
        mycol = mydb[df.name]
        
        try: 
            result = mycol.insert_many(df_to_ls_of_dict(df))

            # return a friendly error if the operation fails
        except pymongo.errors.OperationFailure:
            print("An authentication error was received. Are you sure your database user is authorized to perform write operations?")
            sys.exit(1)
            
        else:
            inserted_count = len(result.inserted_ids)
            print(f"I inserted {inserted_count} documents.")
    else:
        print('You have created and updated collection', df.name, ' in previous actions.')

You have created collection customers
I inserted 10 documents.
I inserted 10 documents.
I inserted 10 documents.
I inserted 10 documents.


### Querying Data:

In [88]:
customers = mydb['customers']
cust_data = customers.find()
df_cust = pd.DataFrame(cust_data)

deliveries = mydb['deliveries']
deliveries_data = deliveries.find()
df_deliveries = pd.DataFrame(deliveries_data)

orders = mydb['orders']
orders_data = orders.find()
df_orders = pd.DataFrame(orders_data)

products = mydb['products']
products_data = products.find()
df_products = pd.DataFrame(products_data)

supplies = mydb['supplies']
supplies_data = supplies.find()
df_supplies = pd.DataFrame(supplies_data)
df_supplies

Unnamed: 0,_id,SupplyID,ProductID,Supplier,StockAvailable
0,66bb92baa0dd693178a808f2,3001,101,ABC Ltd,100
1,66bb92baa0dd693178a808f3,3002,102,XYZ Corp,200
2,66bb92baa0dd693178a808f4,3003,103,LMN Inc,150
3,66bb92baa0dd693178a808f5,3004,104,OPQ Ltd,120
4,66bb92baa0dd693178a808f6,3005,105,RST Corp,130
5,66bb92baa0dd693178a808f7,3006,106,UVW Inc,110
6,66bb92baa0dd693178a808f8,3007,107,XYZ Corp,90
7,66bb92baa0dd693178a808f9,3008,108,ABC Ltd,200
8,66bb92baa0dd693178a808fa,3009,109,LMN Inc,140
9,66bb92baa0dd693178a808fb,3010,110,OPQ Ltd,160


In [89]:
df_orders

Unnamed: 0,_id,OrderID,CustomerID,ProductID,Quantity,OrderDate
0,66bb92b9a0dd693178a808de,1001,1,101,2,01-08-2024
1,66bb92b9a0dd693178a808df,1002,2,102,1,02-08-2024
2,66bb92b9a0dd693178a808e0,1003,3,103,3,03-08-2024
3,66bb92b9a0dd693178a808e1,1004,4,104,2,04-08-2024
4,66bb92b9a0dd693178a808e2,1005,5,105,5,05-08-2024
5,66bb92b9a0dd693178a808e3,1006,6,106,4,06-08-2024
6,66bb92b9a0dd693178a808e4,1007,7,107,1,07-08-2024
7,66bb92b9a0dd693178a808e5,1008,8,108,2,08-08-2024
8,66bb92b9a0dd693178a808e6,1009,9,109,3,09-08-2024
9,66bb92b9a0dd693178a808e7,1010,10,110,1,10-08-2024


In [90]:
# Query the Orders collection to get all orders placed by a specific customer (e.g., CustomerID = 1).
order_id1 = orders.find({"CustomerID" : 1})
for order in order_id1:
    print(order)


{'_id': ObjectId('66bb92b9a0dd693178a808de'), 'OrderID': 1001, 'CustomerID': 1, 'ProductID': 101, 'Quantity': 2, 'OrderDate': '01-08-2024'}


In [91]:
# Query the Deliveries collection to check the delivery status of a specific order.
deliver_id1 = deliveries.find({"OrderID" : 1001})
for delivery in deliver_id1:
    print(delivery)

{'_id': ObjectId('66bb92b9a0dd693178a808d4'), 'DeliveryID': 2001, 'OrderID': 1001, 'DeliveryDate': '2024-08-03', 'Status': 'Delivered'}


In [92]:
#  Query the Supplies collection to list all products supplied by a specific supplier.
supply_id1 = supplies.find({"Supplier" : "XYZ Corp"})
for i in supply_id1:
    print(i)

{'_id': ObjectId('66bb92baa0dd693178a808f3'), 'SupplyID': 3002, 'ProductID': 102, 'Supplier': 'XYZ Corp', 'StockAvailable': 200}
{'_id': ObjectId('66bb92baa0dd693178a808f8'), 'SupplyID': 3007, 'ProductID': 107, 'Supplier': 'XYZ Corp', 'StockAvailable': 90}


In [94]:
orders

Collection(Database(MongoClient(host=['cluster-de-shard-00-02.sia6f.mongodb.net:27017', 'cluster-de-shard-00-01.sia6f.mongodb.net:27017', 'cluster-de-shard-00-00.sia6f.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', appname='Cluster-de', authsource='admin', replicaset='atlas-cr8t3y-shard-0', tls=True, tlscafile='/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/certifi/cacert.pem'), 'order-management'), 'orders')

In [98]:
# Write a script to calculate the total revenue from all orders by summing up the product prices multiplied by their quantities. 
order_sum = 0
for i in orders.find():
    product_id = i['ProductID']
    quantity = i['Quantity']
    price_ls = products.find({'ProductID': product_id})
    for item in price_ls:
        price = item['Price']
        print('ProductID', product_id, 'has price of', price)
    order_sum += quantity * price
order_sum

ProductID 101 has price of 19.99
ProductID 102 has price of 24.99
ProductID 103 has price of 29.99
ProductID 104 has price of 34.99
ProductID 105 has price of 39.99
ProductID 106 has price of 44.99
ProductID 107 has price of 49.99
ProductID 108 has price of 54.99
ProductID 109 has price of 59.99
ProductID 110 has price of 64.99


1009.7600000000001

In [107]:
# Find Customers Who Have Placed More Than Two Orders.
# Hint: Use the list of following queries: [
#     {"$group": {"_id": "$CustomerID", "orderCount": {"$sum": 1}}},
#     {"$match": {"orderCount": {"$gt": 2}}}
# ]
# Use the aggregate function.

order = orders.aggregate([
    {
        "$group": {
            "_id": "$CustomerID",
            "orderCount": { "$sum": 1 }
        }
    },
    {
        "$match": {
            "orderCount": { "$gt": 2 }
        }
    }
])
for i in order:
    print(i)

In [113]:
# Find orders that contain a specific product (e.g., ProductID = 101) and where the quantity ordered is greater than a given value (e.g., 3)
# Hint: Use the following query: {
#     "$and": [
#         {"ProductID": 101},
#         {"Quantity": {"$gt": 3}}
#     ]
# })
# Use the find function

order = orders.find({
    "$and" : [
    {"ProductID": 101},
    {"Quantity":{"$gt": 1}}
]})
for i in order:
    print(i)

{'_id': ObjectId('66bb92b9a0dd693178a808de'), 'OrderID': 1001, 'CustomerID': 1, 'ProductID': 101, 'Quantity': 2, 'OrderDate': '01-08-2024'}


In [115]:
# Calculate the Total Quantity of Products Ordered by Each Customer.
# Hint: Use the query: [
#     {"$group": {"_id": "$CustomerID", "totalQuantity": {"$sum": "$Quantity"}}}
# ]
# Use the aggregate function

order = orders.aggregate([
    {"$group" : {"_id": "$CustomerID", "totalQuantity": {"$sum": "$Quantity"}}}
])
for i in order:
    print(i)

{'_id': 10, 'totalQuantity': 1}
{'_id': 3, 'totalQuantity': 3}
{'_id': 1, 'totalQuantity': 2}
{'_id': 7, 'totalQuantity': 1}
{'_id': 8, 'totalQuantity': 2}
{'_id': 6, 'totalQuantity': 4}
{'_id': 4, 'totalQuantity': 2}
{'_id': 5, 'totalQuantity': 5}
{'_id': 9, 'totalQuantity': 3}
{'_id': 2, 'totalQuantity': 1}


In [143]:
# Find Deliveries That Are Either "Delivered" or "In Transit"
# Hint: Use the query :{
#     "$or": [
#         {"Status": "Delivered"},
#         {"Status": "In Transit"}
#     ]
# }
# Use the find function

delivery = deliveries.find({
    '$or': [
        {"Status": "Delivered"},
        {'Status': "In Transit"}
    ]
})

for i in delivery:
    print(i)

{'_id': ObjectId('66bb92b9a0dd693178a808d4'), 'DeliveryID': 2001, 'OrderID': 1001, 'DeliveryDate': '2024-08-03', 'Status': 'Delivered'}
{'_id': ObjectId('66bb92b9a0dd693178a808d5'), 'DeliveryID': 2002, 'OrderID': 1002, 'DeliveryDate': '2024-08-04', 'Status': 'In Transit'}
{'_id': ObjectId('66bb92b9a0dd693178a808d6'), 'DeliveryID': 2003, 'OrderID': 1003, 'DeliveryDate': '2024-08-05', 'Status': 'Delivered'}
{'_id': ObjectId('66bb92b9a0dd693178a808d7'), 'DeliveryID': 2004, 'OrderID': 1004, 'DeliveryDate': '2024-08-06', 'Status': 'In Transit'}
{'_id': ObjectId('66bb92b9a0dd693178a808d8'), 'DeliveryID': 2005, 'OrderID': 1005, 'DeliveryDate': '2024-08-07', 'Status': 'Delivered'}
{'_id': ObjectId('66bb92b9a0dd693178a808d9'), 'DeliveryID': 2006, 'OrderID': 1006, 'DeliveryDate': '2024-08-08', 'Status': 'In Transit'}
{'_id': ObjectId('66bb92b9a0dd693178a808da'), 'DeliveryID': 2007, 'OrderID': 1007, 'DeliveryDate': '2024-08-09', 'Status': 'Delivered'}
{'_id': ObjectId('66bb92b9a0dd693178a808db'),

In [135]:
# Identify Customers Who Have Not Placed Any Orders
# Hint: Use the following query:[
#     {
#         "$lookup": {
#             "from": "Orders",
#             "localField": "CustomerID",
#             "foreignField": "CustomerID",
#             "as": "orders"
#         }
#     },
#     {"$match": {"orders": {"$size": 0}}}
# ]
# Use aggregate function

cust = customers.aggregate([
    {
        "$lookup": {
            "from": "orders",
            "localField": "CustomerID",
            "foreignField": "CustomerID",
            "as": "order_num"
        }
    },
    {"$match": {"order_num": {"$size": 0}}}
])

for c in cust:
    print(c)

In [137]:
#  Calculate the Average Order Quantity per Product
# [
#     {"$group": {"_id": "$ProductID", "averageQuantity": {"$avg": "$Quantity"}}}
# ]

order = orders.aggregate([
    {"$group": {"_id": "$ProductID", "averageQuantity": {"$avg": "$Quantity"}}}
])

for o in order:
    print(o)

{'_id': 103, 'averageQuantity': 3.0}
{'_id': 105, 'averageQuantity': 5.0}
{'_id': 101, 'averageQuantity': 2.0}
{'_id': 108, 'averageQuantity': 2.0}
{'_id': 109, 'averageQuantity': 3.0}
{'_id': 110, 'averageQuantity': 1.0}
{'_id': 102, 'averageQuantity': 1.0}
{'_id': 107, 'averageQuantity': 1.0}
{'_id': 106, 'averageQuantity': 4.0}
{'_id': 104, 'averageQuantity': 2.0}


In [140]:
# Retrieve Orders Placed Within a Specific Date Range
# {
#     "OrderDate": {
#         "$gte": "2024-08-01",
#         "$lte": "2024-08-07"
#     }
# })

order = orders.find({
    "OrderDate": {
        "$gte": "01-08-2024",
        "$lte": "07-08-2024"
    }
})

for o in order:
    print(o)

{'_id': ObjectId('66bb92b9a0dd693178a808de'), 'OrderID': 1001, 'CustomerID': 1, 'ProductID': 101, 'Quantity': 2, 'OrderDate': '01-08-2024'}
{'_id': ObjectId('66bb92b9a0dd693178a808df'), 'OrderID': 1002, 'CustomerID': 2, 'ProductID': 102, 'Quantity': 1, 'OrderDate': '02-08-2024'}
{'_id': ObjectId('66bb92b9a0dd693178a808e0'), 'OrderID': 1003, 'CustomerID': 3, 'ProductID': 103, 'Quantity': 3, 'OrderDate': '03-08-2024'}
{'_id': ObjectId('66bb92b9a0dd693178a808e1'), 'OrderID': 1004, 'CustomerID': 4, 'ProductID': 104, 'Quantity': 2, 'OrderDate': '04-08-2024'}
{'_id': ObjectId('66bb92b9a0dd693178a808e2'), 'OrderID': 1005, 'CustomerID': 5, 'ProductID': 105, 'Quantity': 5, 'OrderDate': '05-08-2024'}
{'_id': ObjectId('66bb92b9a0dd693178a808e3'), 'OrderID': 1006, 'CustomerID': 6, 'ProductID': 106, 'Quantity': 4, 'OrderDate': '06-08-2024'}
{'_id': ObjectId('66bb92b9a0dd693178a808e4'), 'OrderID': 1007, 'CustomerID': 7, 'ProductID': 107, 'Quantity': 1, 'OrderDate': '07-08-2024'}


In [144]:
# Update the delivery status of an order (e.g., change the status from "In Transit" to "Delivered"). Use order id to update specific order.
update_query = { "DeliveryID": 2002}
newvalues = { "$set": { "Status": "Delivered" } }

deliveries.update_one(update_query, newvalues)

UpdateResult({'n': 1, 'electionId': ObjectId('7fffffff00000000000000b3'), 'opTime': {'ts': Timestamp(1723592723, 29), 't': 179}, 'nModified': 1, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1723592723, 30), 'signature': {'hash': b'3\x19\x95\xd5>6t\xb0\x82\xf3\xec\xff\x82X\x15\xaf?\xe4\xb9?', 'keyId': 7348064550916194325}}, 'operationTime': Timestamp(1723592723, 29), 'updatedExisting': True}, acknowledged=True)

In [146]:
# Delete a product from the Products collection. Use product id to delete specific product
delete_query = { "ProductID": 110}
products.delete_one(delete_query)

for i in products.find():
    print(i)

{'_id': ObjectId('66bb92baa0dd693178a808e8'), 'ProductID': 101, 'Name': 'Widget A', 'Category': 'Gadgets', 'Price': 19.99}
{'_id': ObjectId('66bb92baa0dd693178a808e9'), 'ProductID': 102, 'Name': 'Widget B', 'Category': 'Gadgets', 'Price': 24.99}
{'_id': ObjectId('66bb92baa0dd693178a808ea'), 'ProductID': 103, 'Name': 'Widget C', 'Category': 'Tools', 'Price': 29.99}
{'_id': ObjectId('66bb92baa0dd693178a808eb'), 'ProductID': 104, 'Name': 'Widget D', 'Category': 'Tools', 'Price': 34.99}
{'_id': ObjectId('66bb92baa0dd693178a808ec'), 'ProductID': 105, 'Name': 'Widget E', 'Category': 'Electronics', 'Price': 39.99}
{'_id': ObjectId('66bb92baa0dd693178a808ed'), 'ProductID': 106, 'Name': 'Widget F', 'Category': 'Electronics', 'Price': 44.99}
{'_id': ObjectId('66bb92baa0dd693178a808ee'), 'ProductID': 107, 'Name': 'Widget G', 'Category': 'Gadgets', 'Price': 49.99}
{'_id': ObjectId('66bb92baa0dd693178a808ef'), 'ProductID': 108, 'Name': 'Widget H', 'Category': 'Tools', 'Price': 54.99}
{'_id': Object