# NavigatorGPT - MongoDB Processing

In [144]:
import json
import pymongo
from pymongo import MongoClient, UpdateOne
from bson.objectid import ObjectId

# Load the MongoDB connection string from the JSON file
PATH_TO_SECRET_JSON = '/home/jovyan/keys/mongodb_key.json'
with open(PATH_TO_SECRET_JSON) as f:
    MONGODB_URI = json.load(f)['connection_string']

# Extract the database name from the connection string
db_name = MONGODB_URI.split('/')[-1].split('?')[0]

# Create a MongoDB client using the connection string
if MONGODB_URI:
    client = MongoClient(MONGODB_URI)
    print("The MongoDB client has been initialized.")
else:
    print("Failed to initialize the MongoDB client.")

# Print the version of the pymongo package
pymongo_version = pymongo.__version__
print(f"The version of the pymongo package is {pymongo_version}")

# Connect to the specified MongoDB database and collection
db = client[db_name]
collection = db.attractions

The MongoDB client has been initialized.
The version of the pymongo package is 4.4.1


In [146]:
# Find the document with the specified Objectid in the 'attractions' collection
document = collection.find_one({"_id": ObjectId("6450c55c3879cf70e53ff044")})

# Print the retrieved document
print(document)


{'_id': ObjectId('6450c55c3879cf70e53ff044'), 'title': 'Schermerhorn Building', 'key': 'SchermerhornBuilding', 'featureKey': 'landmark', 'catalog': 'nycwayfinding', 'loc': {'lat': 40.72761, 'lon': -73.99378, 'location': '376-380 Lafayette Street', 'neighborhood': 'Greenwich Village', 'postalCode': '10012', 'city': 'New York', 'state': 'NY'}, 'map': {'center': {'latitude': 40.72761, 'longitude': -73.99378}, 'zoom': 17}, 'machineTags': [{'tag': 'nycwayfinding:lp=00193'}], 'photo': {'_id': ObjectId('649340baf86d4f4d60e5f631'), 'photoId': 52920417345, 'title': 'Schermerhorn Building', 'url': 'https://live.staticflickr.com/65535/52920417345_6a30663600_t.jpg', 'width': 100, 'height': 75}, 'inventory': {'styles': ['N/A'], 'architects': ['Henry J. Hardenbergh']}, 'aliases': ['376-380 Lafayette Street Building']}


**Description:**
We have a dataset of attractions with specific fields to be updated in the database. Below is the dataset:

In [157]:
data = [

{"loc.boroughCode": "QN", "loc.neighborhood": "Astoria-Long Island City", "loc.city": "Queens", "loc.postalCode": "11102", "loc.park": "Athens Square", "loc.parkId": "Q436", "id": "56c162906e78364115ed877c"},
{"loc.boroughCode": "QN", "loc.neighborhood": "Astoria-Long Island City", "loc.city": "Queens", "loc.postalCode": "11102", "loc.park": "Athens Square", "loc.parkId": "Q436", "id": "56bfcc5acd15882db4ad126a"},
{"loc.boroughCode": "QN", "loc.neighborhood": "Astoria-Long Island City", "loc.city": "Queens", "loc.postalCode": "11102", "loc.park": "Athens Square", "loc.parkId": "Q436", "id": "56bfb26dcd15881fe4bbc059"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Central Park", "loc.city": "New York", "loc.postalCode": "10028", "loc.park": "Central Park", "loc.parkId": "M010", "id": "556aba26083cd88a44265d9a"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Central Park", "loc.city": "New York", "loc.postalCode": "10028", "loc.park": "Central Park", "loc.parkId": "M010", "id": "556ab0ab083cd88a44265d95"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Central Park", "loc.city": "New York", "loc.postalCode": "10028", "loc.park": "Central Park", "loc.parkId": "M010", "id": "556ab330083cd88a44265d96"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Central Park", "loc.city": "New York", "loc.postalCode": "10028", "loc.park": "Central Park", "loc.parkId": "M010", "id": "556ab480083cd88a44265d97"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Central Park", "loc.city": "New York", "loc.postalCode": "10028", "loc.park": "Central Park", "loc.parkId": "M010", "id": "556ab5e7083cd88a44265d98"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Central Park", "loc.city": "New York", "loc.postalCode": "10028", "loc.park": "Central Park", "loc.parkId": "M010", "id": "556ab827083cd88a44265d99"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Battery Park", "loc.city": "New York", "loc.postalCode": "10004", "loc.park": "The Battery", "loc.parkId": "M005", "id": "560f1d8df89701aedbaebc16"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Battery Park", "loc.city": "New York", "loc.postalCode": "10004", "loc.park": "The Battery", "loc.parkId": "M005", "id": "560f498cf89701aedbaebc34"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Battery Park", "loc.city": "New York", "loc.postalCode": "10004", "loc.park": "The Battery", "loc.parkId": "M005", "id": "5606c3cff89701aedbaebb79"},
{"loc.boroughCode": "SI", "loc.neighborhood": "New Brighton", "loc.city": "Staten Island", "loc.postalCode": "10301", "loc.park": "Snug Harbor Cultural Center", "loc.parkId": "R116", "id": "56bf6c4bcd15881494a0f17a"},
{"loc.boroughCode": "SI", "loc.neighborhood": "New Brighton", "loc.city": "Staten Island", "loc.postalCode": "10301", "loc.park": "Snug Harbor Cultural Center", "loc.parkId": "R116", "id": "56bf982ecd158827a80aef2b"},
{"loc.boroughCode": "SI", "loc.neighborhood": "New Brighton", "loc.city": "Staten Island", "loc.postalCode": "10301", "loc.park": "Snug Harbor Cultural Center", "loc.parkId": "R116", "id": "56bf6c4bcd15881494a0f17c"},
{"loc.boroughCode": "SI", "loc.neighborhood": "New Brighton", "loc.city": "Staten Island", "loc.postalCode": "10301", "loc.park": "Snug Harbor Cultural Center", "loc.parkId": "R116", "id": "56bf7b77cd15882bf4d3c14d"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b6d261cd1588295803c3a0"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b2d697cd158828b067f32e"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b7a32ecd15882aa09d717d"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b5ac0dcd158817d04f8b82"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b2d697cd158828b067f32f"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b2d697cd158828b067f330"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b6dd63cd15881694403c0d"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b2d697cd158828b067f331"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b79b58cd15882c90a3af05"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b2d697cd158828b067f332"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b570e2cd15880120dc35c3"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "56b560bdcd158825d839fb9a"},
{"loc.boroughCode": "MN", "loc.neighborhood": "Tribeca", "loc.city": "New York", "loc.postalCode": "10007", "loc.park": "City Hall Park", "loc.parkId": "M013", "id": "644a1410e7abb0665474a28c"}

]

**Description:**
Now, we'll loop through the dataset and update the records in the MongoDB collection.

In [158]:
# Initialize counters and operations list
totalRecords = len(data)
recordsUpdated = 0
operations = []

# Prepare bulk update operations for each record in data
for item in data:
    update_fields = {}

    # Iterate over all keys in the item
    for key, value in item.items():
        # Check if the value is a dictionary (nested dictionary in MongoDB)
        if isinstance(value, dict):
            for sub_key, sub_value in value.items():
                update_fields[f"{key}.{sub_key}"] = sub_value
        else:
            update_fields[key] = value

    operations.append(
        UpdateOne(
            {"_id": ObjectId(item['id'])},  # corrected here
            {"$set": update_fields},
            upsert=True
        )
    )

# Execute the update operations in bulk
result = collection.bulk_write(operations)

# Calculate the total records updated
recordsUpdated = result.modified_count + len(result.upserted_ids)

# Output the results
print(f"Total records: {totalRecords}")
print(f"Records updated: {recordsUpdated}")


Total records: 29
Records updated: 29


**Description:**
Print out the results, showing the total number of records processed and the number of records updated.

In [159]:
print(f"Total records: {totalRecords}")
print(f"Records updated: {recordsUpdated}")

Total records: 29
Records updated: 29
