## Database and Jupyter Notebook Set Up

In [9]:
# Import dependencies
from pymongo import MongoClient
import csv

In [10]:
# Connect to MongoDB
client = MongoClient(port=27017)
db = client['national_parks_db']

In [11]:
# Drop any previously existing 'national_parks_db' database 
client.drop_database('national_parks_db') 

print("Database 'national_parks_db' has been deleted.")

Database 'national_parks_db' has been deleted.


In [12]:
# Connect to MongoDB and create database
client = MongoClient(port=27017)
db = client['national_parks_db']

In [13]:
# Create the collections in the database
nps_amenities_activities = db['Activities_and_Amenities_Collection']
nps_entrance_fees = db['Entrance_Fees_Collection']


In [14]:
# Open the CSV file and insert data into the collection
with open('../Transform/transformed_nps_csv_files/nps_amenities_activities_data.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        nps_amenities_activities.insert_one(row)

# Confirm the data has been inserted by printing the first document
print(nps_amenities_activities.find_one())

{'_id': ObjectId('669f10e8236df02381add5bb'), 'park_code': 'acad', 'park_states': 'ME', 'park_id': '6DA17C86-088E-4B4D-B862-7C1BD5CF236B', 'park_name': 'Acadia National Park', 'amenity_id': '4E4D076A-6866-46C8-A28B-A129E2B8F3DB', 'amenity_name': 'Accessible Rooms', 'activity_id': '', 'activity_name': '', 'park_designation': 'National Park', 'park_url': 'http://www.nps.gov/acad/', 'park_latitude': '44.409286', 'park_longitude': '-68.247501', 'min_summer_temp': '51.1', 'max_summer_temp': '72.9', 'min_winter_temp': '17.6', 'max_winter_temp': '29.2', 'min_spring_temp': '8.6', 'max_spring_temp': '27.8', 'min_fall_temp': '46.5', 'max_fall_temp': '62.2'}


In [15]:
# Open the CSV file and insert data into the collection
with open('../Transform/transformed_nps_csv_files/nps_entrance_fees.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        nps_entrance_fees.insert_one(row)

# Confirm the data has been inserted by printing the first document
print(nps_entrance_fees.find_one())

{'_id': ObjectId('669f10eb236df02381ade00a'), 'park_code': 'acad', 'total_fees': '91.0', 'average_fees': '13.0', 'pass_fee': '70.0', 'free_park': 'False', 'entrance_pass_description': 'A park entrance pass is required year-round. All vehicles must display a pass clearly visible through the windshield. Display federal lands passes, such as annual and military passes, on your dashboard with the signature and expiration date facing up. Passes are non-transferable and passholder must be present.', 'entrance_fee_description': 'A park entrance pass is required year-round at Acadia National Park. All vehicles must display a pass clearly visible through the windshield. Display federal lands passes, such as annual and military passes, on your dashboard with the signature and expiration date facing up. Passes are non-transferable and passholder must be present. A separate vehicle reservation is also required to drive Cadillac Summit Road from mid-May into October. Vehicle reservations are only s

In [16]:
# List all collections in the database
collections = db.list_collection_names()
print("Collections in the database:")
for collection in collections:
    print(collection)


Collections in the database:
Entrance_Fees_Collection
Activities_and_Amenities_Collection


In [17]:
# review a document in the national parks collection
db.Activities_and_Amenities_Collection.find_one()

{'_id': ObjectId('669f10e8236df02381add5bb'),
 'park_code': 'acad',
 'park_states': 'ME',
 'park_id': '6DA17C86-088E-4B4D-B862-7C1BD5CF236B',
 'park_name': 'Acadia National Park',
 'amenity_id': '4E4D076A-6866-46C8-A28B-A129E2B8F3DB',
 'amenity_name': 'Accessible Rooms',
 'activity_id': '',
 'activity_name': '',
 'park_designation': 'National Park',
 'park_url': 'http://www.nps.gov/acad/',
 'park_latitude': '44.409286',
 'park_longitude': '-68.247501',
 'min_summer_temp': '51.1',
 'max_summer_temp': '72.9',
 'min_winter_temp': '17.6',
 'max_winter_temp': '29.2',
 'min_spring_temp': '8.6',
 'max_spring_temp': '27.8',
 'min_fall_temp': '46.5',
 'max_fall_temp': '62.2'}

In [18]:
# review a document in the national parks collection
db.Entrance_Fees_Collection.find_one()

{'_id': ObjectId('669f10eb236df02381ade00a'),
 'park_code': 'acad',
 'total_fees': '91.0',
 'average_fees': '13.0',
 'pass_fee': '70.0',
 'free_park': 'False',
 'entrance_pass_description': 'A park entrance pass is required year-round. All vehicles must display a pass clearly visible through the windshield. Display federal lands passes, such as annual and military passes, on your dashboard with the signature and expiration date facing up. Passes are non-transferable and passholder must be present.',
 'entrance_fee_description': 'A park entrance pass is required year-round at Acadia National Park. All vehicles must display a pass clearly visible through the windshield. Display federal lands passes, such as annual and military passes, on your dashboard with the signature and expiration date facing up. Passes are non-transferable and passholder must be present. A separate vehicle reservation is also required to drive Cadillac Summit Road from mid-May into October. Vehicle reservations are

In [19]:
# Count the documents with the designation of "National Park" present in all three collections: amenities, amenities_places, and parks_data
count_amen_act_data = db['Activities_and_Amenities_Collection'].count_documents({})
count_entrance_fee_data = db['Entrance_Fees_Collection'].count_documents({})

print(f"Number of documents in the national parks collection: {count_amen_act_data}")
print(f"Number of documents with the designation 'National Park' in the national parks collection: {count_entrance_fee_data}")


Number of documents in the national parks collection: 2639
Number of documents with the designation 'National Park' in the national parks collection: 51


### Time to add some data analysis. Tutor suggested using Seaborn library (since we need one we haven't used before in class) in place of MatPlotLib. said they work nearly identically. Planning to use this to generate descriptive stats visualizations (pie charts, bar graphs, etc). Thoughts?