## Database and Jupyter Notebook Set Up

In [22]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import json
import os

In [23]:
# Connect to MongoDB
client = MongoClient(port=27017)
db = client['national_parks_db']

In [24]:
# Function to load JSON data into MongoDB
def load_json_to_mongo(collection_name, json_file):
    with open(json_file) as file:
        data = json.load(file)
        collection = db[collection_name]
        if isinstance(data, list):
            collection.insert_many(data)
        else:
            collection.insert_one(data)

In [25]:
# Define JSON files and their corresponding collections with folder paths
json_files = {
    'activities_parks': 'NPS_Activities_Fees/activities_parks.json',
    'activities': 'NPS_Activities_Fees/activities.json',
    'feespasses': 'NPS_Activities_Fees/feespasses.json',
    'amenities': 'NPS Amenities Info/nps_amenities_data.json',
    'amenities_places': 'NPS Amenities Info/nps_amen_place_data.json',
    'parks_data': 'NPS Amenities Info/nps_parks_data.json'
}

# Verify that the JSON files exist
for json_file in json_files.values():
    if os.path.exists(json_file):
        print(f"{json_file} exists.")
    else:
        print(f"{json_file} does not exist.")

NPS_Activities_Fees/activities_parks.json exists.
NPS_Activities_Fees/activities.json exists.
NPS_Activities_Fees/feespasses.json exists.
NPS Amenities Info/nps_amenities_data.json exists.
NPS Amenities Info/nps_amen_place_data.json exists.
NPS Amenities Info/nps_parks_data.json exists.


In [26]:
# Load JSON files into MongoDB collections
for collection_name, json_file in json_files.items():
    if os.path.exists(json_file):
        load_json_to_mongo(collection_name, json_file)

print("Data loaded successfully!")

Data loaded successfully!


In [27]:
# List all collections in the database
collections = db.list_collection_names()
print("Collections in the database:")
for collection in collections:
    print(collection)


Collections in the database:
parks_data
activities_parks
activities
amenities_places
amenities
feespasses


In [28]:
# review a document in the amenities collection
db.amenities.find_one()

{'_id': ObjectId('6699cff3fad2f62aef761634'),
 'amenity_id': '0E2B9B01-9AC3-4AAF-A801-8027314BE863',
 'amenity_name': 'Airboat Launch',
 'amenity_category': 'Boating'}

In [29]:
# review a document in the amenities_places collection
db.amenities_places.find_one()

{'_id': ObjectId('6699cff3fad2f62aef761663'),
 'amenity_id': 'A1B0AD01-740C-41E7-8412-FBBEDD5F1443',
 'amenity_name': 'ATM/Cash Machine',
 'park_code': 'badl',
 'park_name': 'Badlands National Park',
 'park_states': 'SD',
 'park_designation': 'National Park',
 'park_url': 'http://www.nps.gov/badl/'}

In [30]:
# review a document in the parks_data collection
db.parks_data.find_one()

{'_id': ObjectId('6699cff3fad2f62aef762078'),
 'park_id': '77E0D7F0-1942-494A-ACE2-9004D2BDC59E',
 'park_url': 'https://www.nps.gov/abli/index.htm',
 'park_name': 'Abraham Lincoln Birthplace National Historical Park',
 'park_code': 'abli',
 'park_latitude': '37.5858662',
 'park_longitude': '-85.67330523',
 'park_designation': 'National Historical Park'}

In [31]:
# Find all documents that don't have the designation of "National Park" in the amenities_places collection
non_national_park_docs_amenities = db['amenities_places'].find({'park_designation': {'$ne': 'National Park'}})

# Store the IDs of the deleted documents from amenities_places collection
non_national_park_ids_amenities = [doc['amenity_id'] for doc in non_national_park_docs_amenities]

# Delete the documents from the amenities_places collection
result_amenities = db['amenities_places'].delete_many({'park_designation': {'$ne': 'National Park'}})
print(f"Deleted {result_amenities.deleted_count} documents from amenities_places collection.")

# Find all documents that don't have the designation of "National Park" in the parks_data collection
non_national_park_docs_parks = db['parks_data'].find({'park_designation': {'$ne': 'National Park'}})

# Store the IDs of the deleted documents from parks_data collection
non_national_park_ids_parks = [doc['park_id'] for doc in non_national_park_docs_parks]

# Delete the documents from the parks_data collection
result_parks = db['parks_data'].delete_many({'park_designation': {'$ne': 'National Park'}})
print(f"Deleted {result_parks.deleted_count} documents from parks_data collection.")

Deleted 1989 documents from amenities_places collection.
Deleted 88 documents from parks_data collection.


In [32]:
# Delete the corresponding documents in the amenities collection
if non_national_park_ids:
    result = db['amenities'].delete_many({'amenity_id': {'$in': non_national_park_ids}})
    print(f"Deleted {result.deleted_count} documents from amenities collection.")
else:
    print("No documents to delete from amenities collection.")


Deleted 47 documents from amenities collection.


In [36]:
# Count the documents with the designation of "National Park" present in all three collections: amenities, amenities_places, and parks_data
count_amenities = db['amenities'].count_documents({})
count_amenities_places = db['amenities_places'].count_documents({'park_designation': 'National Park'})
count_parks_data = db['parks_data'].count_documents({'park_designation': 'National Park'})

print(f"Number of documents in the amenities collection: {count_amenities}")
print(f"Number of documents with the designation 'National Park' in the amenities_places collection: {count_amenities_places}")
print(f"Number of documents with the designation 'National Park' in the parks_data collection: {count_parks_data}")

Number of documents in the amenities collection: 6
Number of documents with the designation 'National Park' in the amenities_places collection: 1184
Number of documents with the designation 'National Park' in the parks_data collection: 12
