## Database and Jupyter Notebook Set Up

In [36]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import json
import os

In [37]:
# Connect to MongoDB
client = MongoClient(port=27017)
db = client['national_parks_db']

In [39]:
# Drop the 'national_parks_db' database 
client.drop_database('national_parks_db') 

print("Database 'national_parks_db' has been deleted.")

Database 'national_parks_db' has been deleted.


In [40]:
# Connect to MongoDB
client = MongoClient(port=27017)
db = client['national_parks_db']

In [41]:
# Function to load JSON data into MongoDB
def load_json_to_mongo(collection_name, json_file):
    with open(json_file) as file:
        data = json.load(file)
        collection = db[collection_name]
        if isinstance(data, list):
            collection.insert_many(data)
        else:
            collection.insert_one(data)

In [42]:
# Define JSON files and their corresponding collections with folder paths
json_files = {
    'activities_parks': 'NPS_Activities_Fees/activities_parks.json',
    'activities': 'NPS_Activities_Fees/activities.json',
    'feespasses': 'NPS_Activities_Fees/feespasses.json',
    'amenities': 'NPS Amenities Info/nps_amenities_data.json',
    'amenities_places': 'NPS Amenities Info/nps_amen_place_data.json',
    'parks_data': 'NPS Amenities Info/nps_parks_data.json',
    'places_data': 'NPS Amenities Info/nps_places_data.json'
}

# Verify that the JSON files exist
for json_file in json_files.values():
    if os.path.exists(json_file):
        print(f"{json_file} exists.")
    else:
        print(f"{json_file} does not exist.")

NPS_Activities_Fees/activities_parks.json exists.
NPS_Activities_Fees/activities.json exists.
NPS_Activities_Fees/feespasses.json exists.
NPS Amenities Info/nps_amenities_data.json exists.
NPS Amenities Info/nps_amen_place_data.json exists.
NPS Amenities Info/nps_parks_data.json exists.
NPS Amenities Info/nps_places_data.json exists.


In [43]:
# Load JSON files into MongoDB collections
for collection_name, json_file in json_files.items():
    if os.path.exists(json_file):
        load_json_to_mongo(collection_name, json_file)

print("Data loaded successfully!")

Data loaded successfully!


In [44]:
# List all collections in the database
collections = db.list_collection_names()
print("Collections in the database:")
for collection in collections:
    print(collection)


Collections in the database:
parks_data
amenities
places_data
feespasses
activities
activities_parks
amenities_places


In [45]:
# review a document in the amenities collection
db.amenities.find_one()

{'_id': ObjectId('6699e9885c978184f6de2366'),
 'amenity_id': 'A1B0AD01-740C-41E7-8412-FBBEDD5F1443',
 'amenity_name': 'ATM/Cash Machine',
 'amenity_category': 'Convenience, Souvenirs and Supplies'}

In [46]:
# review a document in the amenities_places collection
db.amenities_places.find_one()

{'_id': ObjectId('6699e9885c978184f6de2398'),
 'amenity_id': 'A1B0AD01-740C-41E7-8412-FBBEDD5F1443',
 'amenity_name': 'ATM/Cash Machine',
 'park_code': 'badl',
 'park_name': 'Badlands National Park',
 'park_states': 'SD',
 'park_designation': 'National Park',
 'park_url': 'http://www.nps.gov/badl/'}

In [47]:
# review a document in the parks_data collection
db.parks_data.find_one()

{'_id': ObjectId('6699e9885c978184f6de2dad'),
 'park_id': '77E0D7F0-1942-494A-ACE2-9004D2BDC59E',
 'park_url': 'https://www.nps.gov/abli/index.htm',
 'park_name': 'Abraham Lincoln Birthplace National Historical Park',
 'park_code': 'abli',
 'park_latitude': '37.5858662',
 'park_longitude': '-85.67330523',
 'park_designation': 'National Historical Park'}

In [48]:
# review a document in the parks_data collection
db.places_data.find_one()

{'_id': ObjectId('6699e9885c978184f6de2ddf'),
 'place_id': '869014EF-C785-42B5-9F7E-891AACBE7ED4',
 'park_url': 'https://www.nps.gov/bost/index.htm',
 'park_name': 'Boston National Historical Park',
 'park_code': 'bost',
 'park_latitude': '42.37399765017269',
 'park_longitude': '-71.05610489845276',
 'park_designation': 'National Historical Park'}

In [49]:
# Find all documents that don't have the designation of "National Park" in the amenities_places collection
non_national_park_docs_amenities = db['amenities_places'].find({'park_designation': {'$ne': 'National Park'}})

# Store the IDs of the deleted documents from amenities_places collection
non_national_park_ids_amenities = [doc['amenity_id'] for doc in non_national_park_docs_amenities]

# Delete the documents from the amenities_places collection
result_amenities = db['amenities_places'].delete_many({'park_designation': {'$ne': 'National Park'}})
print(f"Deleted {result_amenities.deleted_count} documents from amenities_places collection.")

Deleted 1989 documents from amenities_places collection.


In [50]:
# Find all documents that don't have the designation of "National Park" in the parks_data collection
non_national_park_docs_parks = db['parks_data'].find({'park_designation': {'$ne': 'National Park'}})

# Store the IDs of the deleted documents from parks_data collection
non_national_park_ids_parks = [doc['park_id'] for doc in non_national_park_docs_parks]

# Delete the documents from the parks_data collection
result_parks = db['parks_data'].delete_many({'park_designation': {'$ne': 'National Park'}})
print(f"Deleted {result_parks.deleted_count} documents from parks_data collection.")

Deleted 44 documents from parks_data collection.


In [51]:
# Find all documents that don't have the designation of "National Park" in the places collection
non_national_park_docs_places = db['places_data'].find({'park_designation': {'$ne': 'National Park'}})

# Store the IDs of the deleted documents from places collection
non_national_park_ids_places = [doc.get('place_id') for doc in non_national_park_docs_places if 'place_id' in doc]

# Delete the documents from the places collection
result_places = db['places_data'].delete_many({'park_designation': {'$ne': 'National Park'}})
print(f"Deleted {result_places.deleted_count} documents from places_data collection.")

Deleted 38 documents from places_data collection.


In [53]:
# Delete the corresponding documents in the amenities collection
if non_national_park_ids_amenities:
    result = db['amenities'].delete_many({'amenity_id': {'$in': non_national_park_ids_amenities}})
    print(f"Deleted {result.deleted_count} documents from amenities collection.")
else:
    print("No documents to delete from amenities collection.")


Deleted 47 documents from amenities collection.


In [54]:
# Count the documents with the designation of "National Park" present in all three collections: amenities, amenities_places, and parks_data
count_amenities = db['amenities'].count_documents({})
count_amenities_places = db['amenities_places'].count_documents({'park_designation': 'National Park'})
count_parks_data = db['parks_data'].count_documents({'park_designation': 'National Park'})
count_places_data = db['places_data'].count_documents({'park_designation': 'National Park'})

print(f"Number of documents in the amenities collection: {count_amenities}")
print(f"Number of documents with the designation 'National Park' in the amenities_places collection: {count_amenities_places}")
print(f"Number of documents with the designation 'National Park' in the parks_data collection: {count_parks_data}")
print(f"Number of documents with the designation 'National Park' in the places_data collection: {count_places_data}")

Number of documents in the amenities collection: 3
Number of documents with the designation 'National Park' in the amenities_places collection: 592
Number of documents with the designation 'National Park' in the parks_data collection: 6
Number of documents with the designation 'National Park' in the places_data collection: 12
