## Database and Jupyter Notebook Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import json
import os

In [None]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')

# Drop the 'national_parks_db' database
client.drop_database('national_parks_db')

print("Database 'national_parks_db' has been deleted.")

In [3]:
# Connect to MongoDB
client = MongoClient(port=27017)
db = client['national_parks_db']

In [4]:
# Function to load JSON data into MongoDB
def load_json_to_mongo(collection_name, json_file):
    with open(json_file) as file:
        data = json.load(file)
        collection = db[collection_name]
        if isinstance(data, list):
            collection.insert_many(data)
        else:
            collection.insert_one(data)

In [5]:
# Define JSON files and their corresponding collections with folder paths
json_files = {
    'activities_parks': 'NPS_Activities_Fees/activities_parks.json',
    'activities': 'NPS_Activities_Fees/activities.json',
    'feespasses': 'NPS_Activities_Fees/feespasses.json',
    'amenities': 'NPS Amenities Info/nps_amenities_data.json',
    'amenities_places': 'NPS Amenities Info/nps_amen_place_data.json',
    'parks_data': 'NPS Amenities Info/nps_parks_data.json'
}

# Verify that the JSON files exist
for json_file in json_files.values():
    if os.path.exists(json_file):
        print(f"{json_file} exists.")
    else:
        print(f"{json_file} does not exist.")

NPS_Activities_Fees/activities_parks.json exists.
NPS_Activities_Fees/activities.json exists.
NPS_Activities_Fees/feespasses.json exists.
NPS Amenities Info/nps_amenities_data.json exists.
NPS Amenities Info/nps_amen_place_data.json exists.
NPS Amenities Info/nps_parks_data.json exists.


In [6]:
# Load JSON files into MongoDB collections
for collection_name, json_file in json_files.items():
    if os.path.exists(json_file):
        load_json_to_mongo(collection_name, json_file)

print("Data loaded successfully!")

Data loaded successfully!


In [7]:
# List all collections in the database
collections = db.list_collection_names()
print("Collections in the database:")
for collection in collections:
    print(collection)


Collections in the database:
feespasses
amenities_places
activities_parks
parks_data
amenities
activities


In [8]:
# review a document in the amenities collection
db.amenities.find_one()

{'_id': ObjectId('6699e3aa152a83f053781347'),
 'amenity_id': 'A1B0AD01-740C-41E7-8412-FBBEDD5F1443',
 'amenity_name': 'ATM/Cash Machine',
 'amenity_category': 'Convenience, Souvenirs and Supplies'}

In [9]:
# review a document in the amenities_places collection
db.amenities_places.find_one()

{'_id': ObjectId('6699e3aa152a83f053781379'),
 'amenity_id': 'A1B0AD01-740C-41E7-8412-FBBEDD5F1443',
 'amenity_name': 'ATM/Cash Machine',
 'park_code': 'badl',
 'park_name': 'Badlands National Park',
 'park_states': 'SD',
 'park_designation': 'National Park',
 'park_url': 'http://www.nps.gov/badl/'}

In [10]:
# Find all documents that don't have the designation of "National Park"
non_national_park_amenities = db['amenities_places'].find({'park_designation': {'$ne': 'National Park'}})

# Store the IDs of the deleted documents
non_national_park_amenity_ids = [doc['amenity_id'] for doc in non_national_park_amenities]

# Delete the documents from the amenities_places collection
result = db['amenities_places'].delete_many({'park_designation': {'$ne': 'National Park'}})
print(f"Deleted {result.deleted_count} documents from amenities_places collection.")

Deleted 1989 documents from amenities_places collection.


In [12]:
# Delete the corresponding documents in the amenities collection
if non_national_park_amenity_ids:
    result = db['amenities'].delete_many({'amenity_id': {'$in': non_national_park_amenity_ids}})
    print(f"Deleted {result.deleted_count} documents from amenities collection.")
else:
    print("No documents to delete from amenities collection.")


Deleted 47 documents from amenities collection.


In [14]:
# Count documents with the designation of "National Park" in the amenities_places collection
national_park_count = db['amenities_places'].count_documents({'park_designation': 'National Park'})
print(f"Number of documents with the designation 'National Park' in amenities_places collection: {national_park_count}")


Number of documents with the designation 'National Park' in amenities_places collection: 592
