# Users Database

## Create a MongoDB database called usersdb and a collection called users.

In [None]:
from pymongo import MongoClient
from bson.objectid import ObjectId
from bson import json_util


# Connect to MongoDB
client = MongoClient()
db = client.usersdb
users = db.users

## Insert Documents
Insert the documents in example.json into the users collection.

In [None]:
docs = json_util.loads(open("./example.json").read())
docs

In [None]:
users.insert_many(docs)

## Read Documents: Retrieve all documents from the users collection.

In [None]:
from pprint import pprint
for doc in users.find():
    pprint(doc)

## Read Documents: Retrieve a specific user by their email address (e.g., 'sophia.rodriguez@example.com').


In [None]:
users.find_one({"email": "sophia.rodriguez@example.com"})

## Update the birth date field of the user with the email address 'sophia.rodriguez@example.com'.

check that the update was successful

In [None]:
import datetime
users.update_one({
    "email": "sophia.rodriguez@example.com"}, 
    {"$set": {"dateOfBirth": datetime.datetime(1994, 11, 5)}}
)

In [None]:
users.find_one({"email": "sophia.rodriguez@example.com"})

## Update the email field of another user document.
E.g., "james.hernandez@example.com" to "james.hernandez@gmail.com"

In [None]:
users.update_one(
    {"email": "james.hernandez@example.com"},
    {"$set": {"email": "james.hernandez@gmail.com"}}
)


## Query users who are over 30 years old.

In [None]:
query = {"dateOfBirth": {"$lt": datetime.datetime.utcnow() - datetime.timedelta(days=30*365)}}
for doc in users.find(query):
    pprint(doc)


## Query users whose name starts with a specific letter, e.g., 'J'.

In [None]:
list(users.find({"name": {"$regex": "^J"}}))


## Query users whose email contains a specific domain (e.g., 'gmail.com').


In [None]:
list(
    users.find({"email": {"$regex": "@gmail.com"}})
)

## Delete a user document based on their email address, like 'sophia.rodriguez@example.com'.


In [None]:
users.delete_one({"email": "sophia.rodriguez@example.com"})

## Delete all users over a certain age.

# Weather Data Exercise
Consider the data inserted into the weatherdb database by the following code.

In [None]:
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client.weatherdb
weather = db.weather

# Sample data - list of 10 documents with weather information
weather_data = [
    {"date": "2024-02-26", "temperature": 20, "humidity": 60, "wind_speed": 5, "location": "New York"},
    {"date": "2024-02-26", "temperature": 25, "humidity": 55, "wind_speed": 7, "location": "Los Angeles"},
    {"date": "2024-02-26", "temperature": 18, "humidity": 70, "wind_speed": 4, "location": "Chicago"},
    {"date": "2024-02-26", "temperature": 22, "humidity": 50, "wind_speed": 6, "location": "Houston"},
    {"date": "2024-02-26", "temperature": 23, "humidity": 45, "wind_speed": 8, "location": "Miami"},
    {"date": "2024-02-26", "temperature": 19, "humidity": 65, "wind_speed": 3, "location": "San Francisco"},
    {"date": "2024-02-26", "temperature": 21, "humidity": 58, "wind_speed": 6, "location": "Seattle"},
    {"date": "2024-02-26", "temperature": 24, "humidity": 52, "wind_speed": 7, "location": "Dallas"},
    {"date": "2024-02-26", "temperature": 17, "humidity": 68, "wind_speed": 4, "location": "Boston"},
    {"date": "2024-02-26", "temperature": 20, "humidity": 62, "wind_speed": 5, "location": "Phoenix"}
]

# Insert the weather data into the collection
weather.insert_many(weather_data)

## Display all documents in the collection

In [None]:
for doc in weather.find():
    pprint(doc)


## Query documents for a specific location (e.g., "New York")

In [None]:
for doc in weather.find({'location': 'New York'}):
    pprint(doc)


## Update the  document relative to "Phoenix" so humidity is 65

In [None]:
update_result = weather.update_one(
    {"location": "Phoenix"},
    {"$set": {"humidity": 65}}
)

In [None]:
update_result.modified_count

## Find the maximum temperature

In [None]:
weather.find_one(sort=[("temperature", -1)])

In [None]:
list(weather.find().sort([("temperature", -1)]).limit(1))

## Find the minimum humidity

In [None]:
weather.find_one(sort=[("humidity", 1)])

# find the average temperature
See the aggregate function in the [https://www.mongodb.com/docs/manual/reference/operator/aggregation/](https://www.mongodb.com/docs/manual/reference/operator/aggregation/)

In [None]:
list(weather.aggregate(
    [
        {
            "$group": {
                "_id": None, 
                "avg_temp": {"$avg": "$temperature"}
            }
        }
    ]
))

## Delete the document relative to "Boston"

In [None]:
res = weather.delete_one({"location": "Boston"})

In [None]:
res.deleted_count

# An embeded document case

Consider the following data

In [None]:
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client.weatherdb
weather2 = db.weather2

# Sample data - list of documents with weather information for multiple dates and locations
weather_data = [
    {"location": "New York", "dates": [
        {"date": "2024-02-26", "temperature": 20, "humidity": 60, "wind_speed": 5},
        {"date": "2024-02-27", "temperature": 22, "humidity": 65, "wind_speed": 6},
        {"date": "2024-02-28", "temperature": 18, "humidity": 55, "wind_speed": 4}
    ]},
    {"location": "Los Angeles", "dates": [
        {"date": "2024-02-26", "temperature": 25, "humidity": 55, "wind_speed": 7},
        {"date": "2024-02-27", "temperature": 27, "humidity": 50, "wind_speed": 6},
        {"date": "2024-02-28", "temperature": 24, "humidity": 58, "wind_speed": 5}
    ]},
    {"location": "Chicago", "dates": [
        {"date": "2024-02-26", "temperature": 18, "humidity": 70, "wind_speed": 4},
        {"date": "2024-02-27", "temperature": 20, "humidity": 68, "wind_speed": 3},
        {"date": "2024-02-28", "temperature": 16, "humidity": 72, "wind_speed": 5}
    ]},
    {"location": "Houston", "dates": [
        {"date": "2024-02-26", "temperature": 22, "humidity": 50, "wind_speed": 6},
        {"date": "2024-02-27", "temperature": 24, "humidity": 48, "wind_speed": 7},
        {"date": "2024-02-28", "temperature": 21, "humidity": 52, "wind_speed": 5}
    ]},
    # Add more locations with multiple dates and weather data
]

weather2.insert_many(weather_data)

## Find the average humidity for each location

See the aggregate function  and the project operator. This operator allows you to specify which fields you want to return.

In [None]:
cursor = weather2.aggregate(
    [
        {
            "$project": {
                "location": 1,
                "list_humidity_values": "$dates.humidity",
                "avg_humidity": {"$avg": "$dates.humidity"}
            }
        }
    ]
)

for doc in cursor:
    pprint(doc)


## Find the mean humidity for each date

Suggestion:  see the `unwind` operator which is used to deconstruct an array field from the input documents to output a document for each element of the array. Each output document is a version of the input document with the value of the array field replaced by the element. This stage is particularly useful for working with documents that contain an array of subdocuments or values, allowing you to perform operations on each element of the array as if it were part of a separate document. 

In [None]:
cursor = weather2.aggregate(
    [
        {
            "$unwind": "$dates",
        }
    ]
)

for doc in cursor:
    pprint(doc)

In [None]:
cursor = weather2.aggregate(
    [
        {
            "$unwind": "$dates",
        },
        {
            "$group": {
                "_id": "$dates.date",
                "mean_humidity": {"$avg": "$dates.humidity"}
            }
        }
    ]
)


for doc in cursor:
    pprint(doc)

## Find the maximum temperature for each location

In [None]:
cursor = weather2.aggregate(
    [
        {
            "$unwind": "$dates",
        },
        {
            "$group": {
                "_id": "$location",
                "max_temp": {"$max": "$dates.temperature"}
            }
        }
    ]
)

for doc in cursor:
    pprint(doc)