## Mongo DB - Geolocation

In [416]:
import json
import pymongo
import pandas as pd
import pprint
import os
from pymongo import MongoClient, UpdateOne
from bson.objectid import ObjectId
from datetime import datetime

# Load the MongoDB connection string from the JSON file
PATH_TO_SECRET_JSON = '/home/jovyan/keys/mongodb_key.json'
with open(PATH_TO_SECRET_JSON) as f:
    MONGODB_URI = json.load(f)['connection_string']

# Extract the database name from the connection string
db_name = MONGODB_URI.split('/')[-1].split('?')[0]

# Create a MongoDB client using the connection string
if MONGODB_URI:
    client = MongoClient(MONGODB_URI)
    print("The MongoDB client has been initialized.")
else:
    print("Failed to initialize the MongoDB client.")

# Print the version of the pymongo package
pymongo_version = pymongo.__version__
print(f"The version of the pymongo package is {pymongo_version}")

# Connect to the specified MongoDB database and collection
db = client[db_name]
collection = db.attractions

The MongoDB client has been initialized.
The version of the pymongo package is 4.4.1


In [424]:
cursor = collection.find(
    {
        "loc.geo": {
            "$geoWithin": { 
                "$box": [
                   [-74.011941, 40.719226], [-74.000941, 40.709226]
                ] 
            }
        }
    },
    {"_id": 1, "title": 1}  # This is the projection parameter
)

results = []
for doc in cursor:
    results.append(doc)

# Create a DataFrame
df = pd.DataFrame(results)

##markdown_table = df.to_markdown()
##print(markdown_table)

df.head(5)

Unnamed: 0,_id,title
0,53668a995c959622000890a9,Balloon Flower (Red)
1,53668a995c959622000890b1,Dreaming of Far Away Places: The Ships Come to...
2,53668a995c959622000890b7,Five in One
3,54fbf93293a2b1e1e52db607,Double Check
4,5520244d610bf1c45d848a37,New York in Its Infancy


In [425]:
# Create a directory if it doesn't exist
output_directory = "../downloads/csv/"
os.makedirs(output_directory, exist_ok=True)

# Generate the Excel file name with current date and time
current_datetime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
csv_filename = f"attractions_{current_datetime}.csv"
csv_filepath = os.path.join(output_directory, csv_filename)

# Save the DataFrame to an CSV file
df.to_csv(csv_filepath, index=False)