Installing PyMongo

In [1]:
!pip install pymongo


Defaulting to user installation because normal site-packages is not writeable
Collecting pymongo
  Downloading pymongo-4.6.1-cp39-cp39-macosx_10_9_universal2.whl (534 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m534.5/534.5 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dnspython<3.0.0,>=1.16.0
  Downloading dnspython-2.5.0-py3-none-any.whl (305 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m305.4/305.4 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.5.0 pymongo-4.6.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m


Connecting to MongoDB

In [2]:
from pymongo import MongoClient
from datetime import datetime

# Connect to the MongoDB client (replace with your connection URI if not local)
client = MongoClient("mongodb://localhost:27017/")

# Select the database and collection
db = client["WSDA_Music"]
invoice_collection = db["Invoice"]

## Challenge 1

General queries that begin to give you some high-level context.

### Q1. How many transactions took place between the years 2011 and 2012?

In [16]:
# Define the date range in the string format
start_date = '2011-01-01 00:00:00'
end_date = '2012-12-31 23:59:59'

# Query to count number of documents/records
number_of_transactions = invoice_collection.count_documents({
    "InvoiceDate": {
        "$gte": start_date,
        "$lte": end_date
    }
})

# Print number of transactions
print(f"{number_of_transactions} Transactions took place between 2011 and 2012")

167 Transactions took place between 2011 and 2012


### Q2. How much money did WSDA Music make during the same period?

In [33]:
# Define the date range in the string format
start_date = '2011-01-01 00:00:00'
end_date = '2012-12-31 23:59:59'

# Aggregation pipeline to filter and sum total sales
pipeline = [
    {
        "$match": {
            "InvoiceDate" : {
                "$gte": start_date,
                "$lte": end_date
            }
        }
    },
    {
        "$group": {
            "_id": None,
            "Total Sales": { "$sum" : "$Total"}
        }
    }
]
# Execute the aggregation pipeline on the 'invoice_collection' and convert the result to a list.
# The aggregation pipeline is stored in the 'pipeline' variable and is designed to filter documents by date and sum their sales.
total_sales_list = list(invoice_collection.aggregate(pipeline))

# Extract the total sales amount from the first element of the 'total_sales_list'.
total_sales = total_sales_list[0]['Total Sales']

print("Total Sales made between 2011 and 2012 is", total_sales)


Total Sales made between 2011 and 2012 is 1947.97


## Challenge 2

### Q1. How many transactions took place between the years 2011 and 2012?

In [3]:
# Define the date range in the string format
start_date = '2011-01-01 00:00:00'
end_date = '2012-12-31 23:59:59'

# Aggregation pipeline
pipeline = [
    {
        "$lookup": {
            "from": "Customer",
            "localField": "CustomerId",
            "foreignField": "CustomerId",
            "as": "customerDetails"
        }
    },
    {
        "$unwind": "$customerDetails"
    },
    {
        "$match": {
            "InvoiceDate": {
                "$gte": start_date,
                "$lte": end_date
            }
        }
    },
    {
        "$group": {
            "_id": {
                "FirstName": "$customerDetails.FirstName",
                "LastName": "$customerDetails.LastName",
                "City": "$customerDetails.City",
                "State": "$customerDetails.State"
            },
            "Email": {"$first": "$customerDetails.Email"},
            "Address": {"$first": "$customerDetails.Address"}
        }
    },
    {
        "$sort": {"_id.FirstName": 1}
    }
]

# Execute the aggregation pipeline
customers = list(db.Invoice.aggregate(pipeline))

# Print the results
for customer in customers:
    print(customer)

{'_id': {'FirstName': 'Aaron', 'LastName': 'Mitchell', 'City': 'Winnipeg', 'State': 'MB'}, 'Email': 'aaronmitchell@yahoo.ca', 'Address': '696 Osborne Street'}
{'_id': {'FirstName': 'Alexandre', 'LastName': 'Rocha', 'City': 'São Paulo', 'State': 'SP'}, 'Email': 'alero@uol.com.br', 'Address': 'Av. Paulista, 2022'}
{'_id': {'FirstName': 'Astrid', 'LastName': 'Gruber', 'City': 'Vienne'}, 'Email': 'astrid.gruber@apple.at', 'Address': 'Rotenturmstraße 4, 1010 Innere Stadt'}
{'_id': {'FirstName': 'Bjørn', 'LastName': 'Hansen', 'City': 'Oslo'}, 'Email': 'bjorn.hansen@yahoo.no', 'Address': 'Ullevålsveien 14'}
{'_id': {'FirstName': 'Camille', 'LastName': 'Bernard', 'City': 'Paris'}, 'Email': 'camille.bernard@yahoo.fr', 'Address': '4, Rue Milton'}
{'_id': {'FirstName': 'Daan', 'LastName': 'Peeters', 'City': 'Brussels'}, 'Email': 'daan_peeters@apple.be', 'Address': 'Grétrystraat 63'}
{'_id': {'FirstName': 'Dan', 'LastName': 'Miller', 'City': 'Mountain View', 'State': 'CA'}, 'Email': 'dmiller@comca