## Database and Jupyter Notebook Set Up

In [14]:
# Import dependencies
from pymongo import MongoClient
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
import os

# Load environment variables
dotenv_loaded = load_dotenv()
print(f"dotenv loaded: {dotenv_loaded}")

dotenv loaded: True


In [15]:
# Drop any previously existing 'national_parks_db' database 
client.drop_database('national_parks_db') 

print("Database 'national_parks_db' has been deleted.")

Database 'national_parks_db' has been deleted.


In [16]:
# Connect to MongoDB
client = MongoClient(port=27017)
db = client['national_parks_db']

In [17]:
# Define the directory path for the CSV files
directory_path = Path("../Transform/separate_nps_csv_files")

# Function to read CSV and insert into MongoDB
def update_collection_from_csv(db, collection_name, csv_path):
    df = pd.read_csv(csv_path)
    collection = db[collection_name]
    collection.delete_many({})  # Clear existing data
    collection.insert_many(df.to_dict('records'))


In [18]:
# Update MongoDB collections
update_collection_from_csv(db, 'Amenities_Collection', directory_path / 'nps_separate_amenities_data.csv')
update_collection_from_csv(db, 'Activities_Collection', directory_path / 'nps_separate_activities_data.csv')
update_collection_from_csv(db, 'Activities_and_Amenities_Collection', directory_path / 'nps_separate_amenities_activities_data.csv')
update_collection_from_csv(db, 'Entrance_Fees_Collection', directory_path / 'nps_separate_entrance_fees.csv')


In [19]:
# Confirm the data has been inserted by printing the first document from each collection
print("First document in Amenities_Collection:", db['Amenities_Collection'].find_one())
print("First document in Activities_Collection:", db['Activities_Collection'].find_one())
print("First document in Activities_and_Amenities_Collection:", db['Activities_and_Amenities_Collection'].find_one())
print("First document in Entrance_Fees_Collection:", db['Entrance_Fees_Collection'].find_one())


First document in Amenities_Collection: {'_id': ObjectId('66a05eae9fe5260c453ff856'), 'amenity_id': 'A1B0AD01-740C-41E7-8412-FBBEDD5F1443', 'amenity_name': 'ATM/Cash Machine', 'park_code': 'badl', 'park_states': 'SD', 'park_id': 'B170CCF7-7AB9-48FF-950E-31815FD4DBB2', 'park_latitude': 43.68584846, 'park_longitude': -102.482942, 'park_name': 'Badlands National Park', 'park_designation': 'National Park', 'park_url': 'http://www.nps.gov/badl/', 'min_summer_temp': 56.2, 'max_summer_temp': 79.2, 'min_winter_temp': 18.0, 'max_winter_temp': 25.9, 'min_spring_temp': 12.5, 'max_spring_temp': 41.4, 'min_fall_temp': 46.9, 'max_fall_temp': 60.8}
First document in Activities_Collection: {'_id': ObjectId('66a05eae9fe5260c453fffa3'), 'activity_id': '09DF0950-D319-4557-A57E-04CD2F63FF42', 'activity_name': 'Arts and Culture', 'park_code': 'acad', 'park_states': 'ME', 'park_id': '6DA17C86-088E-4B4D-B862-7C1BD5CF236B', 'park_latitude': 44.409286, 'park_longitude': -68.247501, 'park_designation': 'Nationa

In [20]:
# List all collections in the database
collections = db.list_collection_names()
print("Collections in the database:")
for collection in collections:
    print(collection)

print("MongoDB collections have been updated with the new data.")


Collections in the database:
Entrance_Fees_Collection
Amenities_Collection
Activities_Collection
Activities_and_Amenities_Collection
MongoDB collections have been updated with the new data.
