# NavigatorGPT - MongoDB Processing

In [None]:
pip install openpyxl

In [57]:
import json
import pymongo
import pandas as pd
import pprint
import os
from pymongo import MongoClient, UpdateOne
from bson.objectid import ObjectId
from datetime import datetime

# Load the MongoDB connection string from the JSON file
PATH_TO_SECRET_JSON = '/home/jovyan/keys/mongodb_key.json'
with open(PATH_TO_SECRET_JSON) as f:
    MONGODB_URI = json.load(f)['connection_string']

# Extract the database name from the connection string
db_name = MONGODB_URI.split('/')[-1].split('?')[0]

# Create a MongoDB client using the connection string
if MONGODB_URI:
    client = MongoClient(MONGODB_URI)
    print("The MongoDB client has been initialized.")
else:
    print("Failed to initialize the MongoDB client.")

# Print the version of the pymongo package
pymongo_version = pymongo.__version__
print(f"The version of the pymongo package is {pymongo_version}")

# Connect to the specified MongoDB database and collection
db = client[db_name]
collection = db.attractions

The MongoDB client has been initialized.
The version of the pymongo package is 4.4.1


In [None]:
cursor = collection.find(
    {
        "loc.geo": {
            "$geoWithin": { 
                "$box": [
                   [-74.011941, 40.719226], [-74.000941, 40.709226]
                ] 
            }
        }
    },
    {"_id": 1, "title": 1}  # This is the projection parameter
)

results = []
for doc in cursor:
    results.append(doc)

# Create a DataFrame
df = pd.DataFrame(results)

markdown_table = df.to_markdown()

print(markdown_table)

### CSV Processing

In [59]:
# Create a directory if it doesn't exist
output_directory = "../downloads/csv/"
os.makedirs(output_directory, exist_ok=True)

# Generate the Excel file name with current date and time
current_datetime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
csv_filename = f"attractions_{current_datetime}.csv"
csv_filepath = os.path.join(output_directory, csv_filename)

# Save the DataFrame to an CSV file
df.to_csv(csv_filepath, index=False)

In [None]:
# Find the document with the specified Objectid in the 'attractions' collection
document = collection.find_one({"_id": ObjectId("6450c55c3879cf70e53ff044")})

# Print the retrieved document
pprint.pprint(document)


**Description:**
We have a dataset of attractions with specific fields to be updated in the database. Below is the dataset:

In [161]:
data = [
    {
        "id": "6455bad54673c82a34f4eccc",
        "landmark.lpNumber": "LP-00160",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1968-11-12T00:00:00"
        }
    },
    {
        "id": "6470e61c9e866c1d68eb587f",
        "landmark.lpNumber": "LP-00161",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1968-03-19T00:00:00"
        }
    },
    {
        "id": "64583f814673c82a34f4ecf6",
        "landmark.lpNumber": "LP-00162",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1969-11-19T00:00:00"
        }
    },
    {
        "id": "6470d6d49e866c1d68eb587c",
        "landmark.lpNumber": "LP-00163",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "2016-08-10T00:00:00"
        }
    },
    {
        "id": "64582fe34673c82a34f4ece9",
        "landmark.lpNumber": "LP-00164",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-05-17T00:00:00"
        }
    },
    {
        "id": "645830e24673c82a34f4ecea",
        "landmark.lpNumber": "LP-00165",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-03-15T00:00:00"
        }
    },
    {
        "id": "560f3cfef89701aedbaebc31",
        "landmark.lpNumber": "LP-00167",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-10-19T00:00:00"
        }
    },
    {
        "id": "647046569e866c1d68eb5870",
        "landmark.lpNumber": "LP-00168",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1968-03-19T00:00:00"
        }
    },
    {
        "id": "6455a9f14673c82a34f4ecbc",
        "landmark.lpNumber": "LP-00169",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-07-19T00:00:00"
        }
    },
    {
        "id": "6455ca704673c82a34f4ecce",
        "landmark.lpNumber": "LP-00170",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-03-15T00:00:00"
        }
    },
    {
        "id": "6455cb4d4673c82a34f4eccf",
        "landmark.lpNumber": "LP-00170E",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1979-01-09T00:00:00"
        }
    },
    {
        "id": "64963e6313f51e5a4696f276",
        "landmark.lpNumber": "LP-00171",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-03-15T00:00:00"
        }
    },
    {
        "id": "560df230f89701aedbaebbf4",
        "landmark.lpNumber": "LP-00172",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-03-15T00:00:00"
        }
    },
    {
        "id": "6444d47be7abb0665474a20b",
        "landmark.lpNumber": "LP-00173",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1968-01-17T00:00:00"
        }
    },
    {
        "id": "64ad97a0fc95c50bc98dd02e",
        "landmark.lpNumber": "LP-00175",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1969-02-11T00:00:00"
        }
    },
    {
        "id": "6455eec44673c82a34f4ecd3",
        "landmark.lpNumber": "LP-00176",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1969-11-19T00:00:00"
        }
    },
    {
        "id": "6455edd04673c82a34f4ecd2",
        "landmark.lpNumber": "LP-00177",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-07-19T00:00:00"
        }
    },
    {
        "id": "5604c7a2f89701aedbaebb66",
        "landmark.lpNumber": "LP-00179",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-09-20T00:00:00"
        }
    },
    {
        "id": "646c311c9e866c1d68eb57c1",
        "landmark.lpNumber": "LP-00181",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-04-19T00:00:00"
        }
    },
    {
        "id": "6450c7d33879cf70e53ff046",
        "landmark.lpNumber": "LP-00183",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1969-11-19T00:00:00"
        }
    },
    {
        "id": "561f1decf89701aedbaebc95",
        "landmark.lpNumber": "LP-00184",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-04-19T00:00:00"
        }
    },
    {
        "id": "646af3da9e866c1d68eb57a5",
        "landmark.lpNumber": "LP-00186",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-06-21T00:00:00"
        }
    },
    {
        "id": "644ccafd3879cf70e53ff02b",
        "landmark.lpNumber": "LP-00187",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-06-21T00:00:00"
        }
    },
    {
        "id": "646dc5dc9e866c1d68eb5825",
        "landmark.lpNumber": "LP-00192",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1967-01-11T00:00:00"
        }
    },
    {
        "id": "6450c55c3879cf70e53ff044",
        "landmark.lpNumber": "LP-00193",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-05-17T00:00:00"
        }
    },
    {
        "id": "6444d560e7abb0665474a20c",
        "landmark.lpNumber": "LP-00196",
        "landmark.landmarkType": "Individual Landmark",
        "landmark.designationDate": {
            "$date": "1966-05-17T00:00:00"
        }
    }
]


**Description:**
Now, we'll loop through the dataset and update the records in the MongoDB collection.

In [162]:


# Initialize counters and operations list
totalRecords = len(data)
recordsUpdated = 0
operations = []

# Prepare bulk update operations for each record in data
for item in data:
    update_fields = {}

    # Iterate over all keys in the item
    for key, value in item.items():
        # Skip 'id' key
        if key == 'id':
            continue

        # Check if the value is a dictionary (nested dictionary in MongoDB)
        if isinstance(value, dict):
            for sub_key, sub_value in value.items():
                # If the sub_key is "$date", convert the value to datetime
                if sub_key == "$date":
                    sub_value = datetime.fromisoformat(sub_value)
                    key = key.replace(".$date", "")
                update_fields[f"{key}"] = sub_value
        else:
            update_fields[key] = value

    # Create the update operation
    operations.append(
        UpdateOne(
            {"_id": ObjectId(item['id'])},
            {"$set": update_fields},
            upsert=True
        )
    )

# Execute the update operations in bulk
result = collection.bulk_write(operations)

# Calculate the total records updated
recordsUpdated = result.modified_count + len(result.upserted_ids)

# Output the results
print(f"Total records: {totalRecords}")
print(f"Records updated: {recordsUpdated}")


Total records: 26
Records updated: 21


**Description:**
Print out the results, showing the total number of records processed and the number of records updated.

In [163]:
print(f"Total records: {totalRecords}")
print(f"Records updated: {recordsUpdated}")

Total records: 26
Records updated: 21
