**Step 1: Connect to ecommerce database**

In [20]:
from pymongo import MongoClient
from datetime import datetime

connection_string = "mongodb+srv://dboukmoussa:Moussa123@clusteraai634o.ee4fv.mongodb.net/?retryWrites=true&w=majority&appName=ClusterAAI634O"

# Connect to the MongoDB Atlas cluster
client = MongoClient(connection_string)

#Access the database and create the orders collection
db = client['ecommerce']
orders_collection = db['orders']

**Step 2: Insert Product Data**

In [21]:
#Define sample orders
sample_orders = [
    {
        "order_id": 1,
        "customer_name": "John Doe",
        "total": 750,
        "order_date": datetime(2023, 1, 15),
        "products": [
            {"product_name": "Smartphone", "quantity": 1, "price": 700},
            {"product_name": "Wireless Charger", "quantity": 1, "price": 50}
        ]
    },
    {
        "order_id": 2,
        "customer_name": "Jane Smith",
        "total": 1200,
        "order_date": datetime(2023, 1, 16),
        "products": [
            {"product_name": "Laptop", "quantity": 1, "price": 1200}
        ]
    },
    {
        "order_id": 3,
        "customer_name": "John Doe",
        "total": 450,
        "order_date": datetime(2023, 1, 17),
        "products": [
            {"product_name": "Office Chair", "quantity": 1, "price": 150},
            {"product_name": "Desk", "quantity": 1, "price": 300}
        ]
    },
    {
        "order_id": 4,
        "customer_name": "Alice Johnson",
        "total": 600,
        "order_date": datetime(2023, 1, 18),
        "products": [
            {"product_name": "Bookshelf", "quantity": 1, "price": 600}
        ]
    },
    {
        "order_id": 5,
        "customer_name": "John Doe",
        "total": 500,
        "order_date": datetime(2023, 1, 19),
        "products": [
            {"product_name": "Headphones", "quantity": 2, "price": 250}
        ]
    }
]

# Insert the sample orders into the collection
result = orders_collection.insert_many(sample_orders)

# Confirm insertion
print(f"Inserted {len(result.inserted_ids)} orders into the 'orders' collection.")


Inserted 5 orders into the 'orders' collection.


**Task 1: Filtering Data**

In [22]:
#Define the query
query = {
    "customer_name": "John Doe",  # Filter by customer name
    "total": {"$gt": 500}        # Total greater than 500
}

# Define the projection to include only specific fields
projection = {
    "order_id": 1,  # Include order_id
    "total": 1,     # Include total
    "_id": 0        # Exclude the default _id field
}

# Execute the query and display results
filtered_orders = orders_collection.find(query, projection)
print("Orders placed by 'John Doe' with total > $500:")
for order in filtered_orders:
    print(order)


Orders placed by 'John Doe' with total > $500:
{'order_id': 1, 'total': 750}
{'order_id': 1, 'total': 750}


**Task 2: Sorting Data**

In [23]:
# Define the sort order
# First by order_date descending (-1), then by total ascending (1)
sort_order = [("order_date", -1), ("total", 1)]

# Define the projection to include specific fields
projection = {
    "order_id": 1,       # Include order_id
    "order_date": 1,     # Include order_date
    "total": 1,          # Include total
    "_id": 0             # Exclude the default _id field
}

# Execute the query with sorting and display results
sorted_orders = orders_collection.find({}, projection).sort(sort_order)
print("Orders sorted by order_date (desc) and total (asc):")
for order in sorted_orders:
    print(order)


Orders sorted by order_date (desc) and total (asc):
{'order_id': 5, 'total': 500, 'order_date': datetime.datetime(2023, 1, 19, 0, 0)}
{'order_id': 5, 'total': 500, 'order_date': datetime.datetime(2023, 1, 19, 0, 0)}
{'order_id': 4, 'total': 600, 'order_date': datetime.datetime(2023, 1, 18, 0, 0)}
{'order_id': 4, 'total': 600, 'order_date': datetime.datetime(2023, 1, 18, 0, 0)}
{'order_id': 3, 'total': 450, 'order_date': datetime.datetime(2023, 1, 17, 0, 0)}
{'order_id': 3, 'total': 450, 'order_date': datetime.datetime(2023, 1, 17, 0, 0)}
{'order_id': 2, 'total': 1200, 'order_date': datetime.datetime(2023, 1, 16, 0, 0)}
{'order_id': 2, 'total': 1200, 'order_date': datetime.datetime(2023, 1, 16, 0, 0)}
{'order_id': 1, 'total': 750, 'order_date': datetime.datetime(2023, 1, 15, 0, 0)}
{'order_id': 1, 'total': 750, 'order_date': datetime.datetime(2023, 1, 15, 0, 0)}


**Task 3: Aggregation - Total Sales per Product**

In [24]:
# Define the aggregation pipeline
pipeline = [
    # Unwind the products array to process each product separately
    {"$unwind": "$products"},
    
    # Group by product_name and calculate total sales
    {
        "$group": {
            "_id": "$products.product_name",  # Group by product_name
            "totalSales": {"$sum": {"$multiply": ["$products.price", "$products.quantity"]}}
        }
    },
    
    # Rename _id to product_name for clarity
    {"$project": {"_id": 0, "product_name": "$_id", "totalSales": 1}},
    
    # Sort by totalSales in descending order
    {"$sort": {"totalSales": -1}}
]

# Execute the aggregation pipeline and display results
total_sales_per_product = orders_collection.aggregate(pipeline)
print("Total sales per product:")
for product in total_sales_per_product:
    print(product)


Total sales per product:
{'totalSales': 2400, 'product_name': 'Laptop'}
{'totalSales': 1400, 'product_name': 'Smartphone'}
{'totalSales': 1200, 'product_name': 'Bookshelf'}
{'totalSales': 1000, 'product_name': 'Headphones'}
{'totalSales': 600, 'product_name': 'Desk'}
{'totalSales': 300, 'product_name': 'Office Chair'}
{'totalSales': 100, 'product_name': 'Wireless Charger'}


**Task 4: Aggregation - Average Order Value per Customer**

In [25]:
# Define the aggregation pipeline
pipeline = [
    # Group by customer_name and calculate the average order value
    {
        "$group": {
            "_id": "$customer_name",  # Group by customer_name
            "averageOrderValue": {"$avg": "$total"}  # Calculate the average of the total field
        }
    },
    
    # Rename _id to customer_name for clarity
    {"$project": {"_id": 0, "customer_name": "$_id", "averageOrderValue": 1}},
    
    # Sort by averageOrderValue in descending order
    {"$sort": {"averageOrderValue": -1}}
]

# Execute the aggregation pipeline and display results
average_order_value_per_customer = orders_collection.aggregate(pipeline)
print("Average order value per customer:")
for customer in average_order_value_per_customer:
    print(customer)


Average order value per customer:
{'averageOrderValue': 1200.0, 'customer_name': 'Jane Smith'}
{'averageOrderValue': 600.0, 'customer_name': 'Alice Johnson'}
{'averageOrderValue': 566.6666666666666, 'customer_name': 'John Doe'}


**Task 5: Advanced Aggregation - Top 5 Products by Quantity Sold**

In [26]:
# Define the aggregation pipeline
pipeline = [
    # Unwind the products array to process each product separately
    {"$unwind": "$products"},
    
    # Group by product_name and calculate total quantity sold
    {
        "$group": {
            "_id": "$products.product_name",  # Group by product_name
            "quantitySold": {"$sum": "$products.quantity"}  # Sum the quantity for each product
        }
    },
    
    # Rename _id to product_name for clarity
    {"$project": {"_id": 0, "product_name": "$_id", "quantitySold": 1}},
    
    # Sort by quantitySold in descending order
    {"$sort": {"quantitySold": -1}},
    
    # Limit the output to the top 5 products
    {"$limit": 5}
]

# Execute the aggregation pipeline and display results
top_products = orders_collection.aggregate(pipeline)
print("Top 5 products by quantity sold:")
for product in top_products:
    print(product)


Top 5 products by quantity sold:
{'quantitySold': 4, 'product_name': 'Headphones'}
{'quantitySold': 2, 'product_name': 'Desk'}
{'quantitySold': 2, 'product_name': 'Bookshelf'}
{'quantitySold': 2, 'product_name': 'Laptop'}
{'quantitySold': 2, 'product_name': 'Smartphone'}
