In [59]:
# Import dependencies.
import requests
import json
import pandas as pd
from pprint import pprint
from pathlib import Path

In [60]:
# path to source files
activities = Path("Resources/Uncleaned/activities_uncleaned.csv")
fees_passes = Path("Resources/Uncleaned/fees_passes_uncleaned.csv")
park_activities = Path("Resources/Uncleaned/park_activities_uncleaned.csv")
parks_list = Path("Resources/Uncleaned/parks_list_uncleaned.csv")
visitors_center = Path("Resources/Uncleaned/visitor_centers_uncleaned.csv")

# read CSVs into DataFrames
activities_df = pd.read_csv(activities)
fees_passes_df = pd.read_csv(fees_passes)
park_activities_df = pd.read_csv(park_activities)
parks_list_df = pd.read_csv(parks_list)
visitors_center_csv = pd.read_csv(visitors_center)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Visitor's Center Dataset

## After deliberating as a group, we decided not to include locations of Visitors' Centers in our visualization, as we wanted to keep our scope small given the amount of time we had.

In [61]:
visitors_center_csv.head()

Unnamed: 0,id,url,name,parkCode,description,latitude,longitude,latLong,audioDescription,isPassportStampLocation,...,directionsInfo,directionsUrl,operatingHours,addresses,images,multimedia,relevanceScore,lastIndexedDate,contacts.phoneNumbers,contacts.emailAddresses
0,6DC6F87E-5D33-469F-8C9C-6D821186633A,,A.G. Gaston Motel,bicr,The A.G. Gaston Motel served as the headquarte...,33.51391,-86.815488,"{lat:33.51391047876354, lng:-86.81548833847046}",,1,...,Birmingham Civil Rights National Monument enco...,,"[{'exceptions': [], 'description': 'The Nation...",[],[{'credit': 'Alabama Tourism Department / Chri...,[],1.0,,[],"[{'description': '', 'emailAddress': 'bicr_inf..."
1,CFBC5D4E-1ADE-42DD-ACC1-D142BB2BD595,http://www.nps.gov/sapu/planyourvisit/hours.htm,Abó,sapu,The Abó Visitor Center includes a bookstore an...,34.450141,-106.374484,"{lat:34.450141091, lng:-106.374483505}","When you drive up NM 513 for 0.75 miles, you w...",1,...,"Located 9 miles west of Mountainair, off Highw...",,[{'exceptions': [{'exceptionHours': {'wednesda...,"[{'postalCode': '87036', 'city': 'Mountainair'...",[{'credit': 'NPS Photo by Park Guide Alex Arno...,[],1.0,,"[{'phoneNumber': '5058472400', 'description': ...","[{'description': '', 'emailAddress': 'sapu_int..."
2,179B5CF5-0117-4AEA-A50D-BB49BACC7AD8,https://www.nps.gov/jela/planyourvisit/new-aca...,Acadian Cultural Center,jela,"Permanent and special exhibits, an award-winni...",30.213396,-91.994157,"{lat:30.21339638288027, lng:-91.99415688831328}",The visitor center at the Acadian Cultural Cen...,1,...,The Acadian Cultural Center is located south o...,http://www.nps.gov/jela/planyourvisit/maps.htm,[{'exceptions': [{'exceptionHours': {'wednesda...,"[{'postalCode': '70508', 'city': 'Lafayette', ...","[{'credit': 'NPS', 'crops': [], 'title': 'ACC ...",[],1.0,,"[{'phoneNumber': '3372320789', 'description': ...","[{'description': '', 'emailAddress': 'jela_int..."
3,8BCDFDF2-1757-4E6D-9616-868AAAB34289,https://www.nps.gov/adam/planyourvisit/visitor...,Adams National Historical Park Visitor Center,adam,The Adams National Historical Park Visitor Cen...,42.252183,-71.003598,"{lat:42.252183001, lng:-71.003597999}",The Adams National Historical Park Visitor Cen...,1,...,"Traveling on U.S. Interstate 93, take exit 7 -...",http://www.nps.gov/adam/planyourvisit/directio...,"[{'exceptions': [{'exceptionHours': {}, 'start...","[{'postalCode': '02169', 'city': 'Quincy', 'st...","[{'credit': 'NPS Image', 'crops': [], 'title':...",[],1.0,,"[{'phoneNumber': '6177701175', 'description': ...","[{'description': '', 'emailAddress': 'adam_vis..."
4,D0B53518-9750-4722-9D6D-E5320C21CB5C,,Administration Clock Tower Building Visitor Ce...,pull,Start your visit at the Administration Clock T...,41.693768,-87.607447,"{lat:41.69376838696189, lng:-87.6074468156807}",An all red brick building about four stories t...,1,...,The Administration Clock Tower Building Visito...,,[{'exceptions': [{'exceptionHours': {'wednesda...,"[{'postalCode': '60628', 'city': 'Chicago', 's...","[{'credit': 'NPS Photo', 'crops': [], 'title':...",[],1.0,,"[{'phoneNumber': '(773) 468-9310', 'descriptio...",[{'description': 'Email will be answered withi...


---

# Parks Activities Dataset

In [62]:
park_activities_df.head()

Unnamed: 0,id,name,parks
0,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture,"[{'states': 'ME', 'parkCode': 'acad', 'designa..."
1,13A57703-BB1A-41A2-94B8-53B692EB7238,Astronomy,"[{'states': 'KY', 'parkCode': 'abli', 'designa..."
2,5F723BAD-7359-48FC-98FA-631592256E35,Auto and ATV,"[{'states': 'HI', 'parkCode': 'alka', 'designa..."
3,7CE6E935-F839-4FEC-A63E-052B1DEF39D2,Biking,"[{'states': 'ME', 'parkCode': 'acad', 'designa..."
4,071BA73C-1D3C-46D4-A53C-00D5602F7F0E,Boating,"[{'states': 'ME', 'parkCode': 'acad', 'designa..."


### During the beginning of our EDA process, we realized this dataset was not necessary for our purposes. While the "Parks List" dataset is a comprehensive document containing a wealth of information about all national parks in the U.S., the "Park Activities" file has each of the available activities, followed by a list of every single park that has those activities. We found this less than helpful. 

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Activities Dataset

In [63]:
# show entire activities dataframe, so we can see the official list of activities in the NPS database
activities_df

Unnamed: 0,id,name
0,09DF0950-D319-4557-A57E-04CD2F63FF42,Arts and Culture
1,13A57703-BB1A-41A2-94B8-53B692EB7238,Astronomy
2,5F723BAD-7359-48FC-98FA-631592256E35,Auto and ATV
3,7CE6E935-F839-4FEC-A63E-052B1DEF39D2,Biking
4,071BA73C-1D3C-46D4-A53C-00D5602F7F0E,Boating
5,A59947B7-3376-49B4-AD02-C0423E08C5F7,Camping
6,07CBCA6A-46B8-413F-8B6C-ABEDEBF9853E,Canyoneering
7,BA316D0F-92AE-4E00-8C80-DBD605DC58C3,Caving
8,B12FAAB9-713F-4B38-83E4-A273F5A43C77,Climbing
9,C11D3746-5063-4BD0-B245-7178D1AD866C,Compass and GPS


### We will most likely be selecting just a top few activities for inclusion on our main dashboard.

### The top activities we are considering right now are: Hiking, Climbing, Camping, Wildlife Watching, Playground, Museum Exhibits, Astronomy, Biking, Boating, Paddling, Canyoneering

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Fees and Passes Dataset

In [64]:
fees_passes_df.head()

Unnamed: 0,parkCode,isFeeFreePark,isInteragencyPassAccepted,cashless,feesAtWorkUrl,entranceFeeDescription,entrancePassDescription,isParkingFeePossible,parkingDetailsUrl,fees,passes
0,abli,True,False,No,,Abraham Lincoln Birthplace National Historical...,Abraham Lincoln Birthplace National Historical...,False,,[],[]
1,acad,False,True,Depends on Location,https://www.nps.gov/acad/planyourvisit/fees.htm,A park entrance pass is required year-round at...,A park entrance pass is required year-round. A...,False,,[{'entranceFeeType': 'Timed Entry Reservation ...,"[{'exceptions': '', 'payGovPurchaseUrl': '', '..."
2,adam,False,True,Yes,https://www.nps.gov/adam/learn/management/your...,Entrance into the historic homes at Adams Nati...,Adams National Historical Park has a digital a...,False,,[{'entranceFeeType': 'Timed Entry Reservation ...,"[{'exceptions': '', 'payGovPurchaseUrl': '', '..."
3,afam,True,False,Depends on Location,,,,True,https://www.nps.gov/nama/planyourvisit/parking...,[],[]
4,afbg,True,False,,,,,False,,[],[]


 ### For this dataset, we decided to drop everything except the left three columns. One contains our primary key that we will use to join some of our DataFrames on. The other two contain Boolean values that will help us determine whether or not a park has a fee, and whether or not they accept an Interagency Pass.

In [65]:
fee_passes_cleaned_df = fees_passes_df[['parkCode', 'isFeeFreePark', 'isInteragencyPassAccepted']]
fee_passes_cleaned_df.head()

Unnamed: 0,parkCode,isFeeFreePark,isInteragencyPassAccepted
0,abli,True,False
1,acad,False,True
2,adam,False,True
3,afam,True,False
4,afbg,True,False


### Fees DataFrame will be merged with the Parks List DataFrame

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Parks List Dataset

### List columns in the dataset for easy reference, since we are dropping many

In [66]:
parks_list_df.columns

Index(['id', 'url', 'fullName', 'parkCode', 'description', 'latitude',
       'longitude', 'latLong', 'activities', 'topics', 'states',
       'entranceFees', 'entrancePasses', 'fees', 'directionsInfo',
       'directionsUrl', 'operatingHours', 'addresses', 'images', 'weatherInfo',
       'name', 'designation', 'relevanceScore', 'contacts.phoneNumbers',
       'contacts.emailAddresses'],
      dtype='object')

### For parks_list: Drop 'contacts.phoneNumbers','contacts.emailAddresses' relevance score, name, designation, weather info, operating hours, directionsurl, directions info, fees, entrancepasses, entrancefees, states, topics, latlong, id

In [67]:
parks_list_cleaned_df = parks_list_df.drop(['contacts.phoneNumbers', 'contacts.emailAddresses', 'relevanceScore', 'name', 'designation', 'weatherInfo', 'operatingHours', 
                                            'directionsUrl', 'directionsInfo', 'fees', 'entranceFees', 'entrancePasses', 'topics', 'latLong', 'id'], axis=1)
parks_list_cleaned_df.columns

Index(['url', 'fullName', 'parkCode', 'description', 'latitude', 'longitude',
       'activities', 'states', 'addresses', 'images'],
      dtype='object')

In [68]:
parks_list_cleaned_df.head()

Unnamed: 0,url,fullName,parkCode,description,latitude,longitude,activities,states,addresses,images
0,https://www.nps.gov/abli/index.htm,Abraham Lincoln Birthplace National Historical...,abli,For over a century people from around the worl...,37.585866,-85.673305,[{'id': '13A57703-BB1A-41A2-94B8-53B692EB7238'...,KY,"[{'postalCode': '42748', 'city': 'Hodgenville'...","[{'credit': 'NPS Photo', 'title': 'The Memoria..."
1,https://www.nps.gov/acad/index.htm,Acadia National Park,acad,Acadia National Park protects the natural beau...,44.409286,-68.247501,[{'id': '09DF0950-D319-4557-A57E-04CD2F63FF42'...,ME,"[{'postalCode': '04609', 'city': 'Bar Harbor',...","[{'credit': 'NPS / Kristi Rugg', 'title': ""Aca..."
2,https://www.nps.gov/adam/index.htm,Adams National Historical Park,adam,From the sweet little farm at the foot of Penn...,42.255396,-71.011604,[{'id': 'B33DC9B6-0B7D-4322-BAD7-A13A34C584A3'...,MA,"[{'postalCode': '02169', 'city': 'Quincy', 'st...","[{'credit': 'NPS Photo', 'title': 'The John an..."
3,https://www.nps.gov/afam/index.htm,African American Civil War Memorial,afam,"Over 200,000 African-American soldiers and sai...",38.9166,-77.026,[{'id': 'B33DC9B6-0B7D-4322-BAD7-A13A34C584A3'...,DC,"[{'postalCode': '20001', 'city': 'Washington',...","[{'credit': 'NPS Photo', 'title': 'African Ame..."
4,https://www.nps.gov/afbg/index.htm,African Burial Ground National Monument,afbg,African Burial Ground is the oldest and larges...,40.714527,-74.004474,[{'id': '09DF0950-D319-4557-A57E-04CD2F63FF42'...,NY,"[{'postalCode': '10007', 'city': 'New York', '...","[{'credit': 'NPS Photo', 'title': 'African Bur..."


### Parse out activity names from "activities" column.

In [69]:
# Extract the names of each activity and place them back in the same cell
parks_list_cleaned_df['activities'] = parks_list_cleaned_df['activities'].apply(lambda x: ', '.join([activity['name'] for activity in json.loads(x.replace("'", "\""))]) if isinstance(x, str) else x)

parks_list_cleaned_df.head(10)

Unnamed: 0,url,fullName,parkCode,description,latitude,longitude,activities,states,addresses,images
0,https://www.nps.gov/abli/index.htm,Abraham Lincoln Birthplace National Historical...,abli,For over a century people from around the worl...,37.585866,-85.673305,"Astronomy, Stargazing, Food, Picnicking, Guide...",KY,"[{'postalCode': '42748', 'city': 'Hodgenville'...","[{'credit': 'NPS Photo', 'title': 'The Memoria..."
1,https://www.nps.gov/acad/index.htm,Acadia National Park,acad,Acadia National Park protects the natural beau...,44.409286,-68.247501,"Arts and Culture, Cultural Demonstrations, Ast...",ME,"[{'postalCode': '04609', 'city': 'Bar Harbor',...","[{'credit': 'NPS / Kristi Rugg', 'title': ""Aca..."
2,https://www.nps.gov/adam/index.htm,Adams National Historical Park,adam,From the sweet little farm at the foot of Penn...,42.255396,-71.011604,"Guided Tours, Living History, First Person Int...",MA,"[{'postalCode': '02169', 'city': 'Quincy', 'st...","[{'credit': 'NPS Photo', 'title': 'The John an..."
3,https://www.nps.gov/afam/index.htm,African American Civil War Memorial,afam,"Over 200,000 African-American soldiers and sai...",38.9166,-77.026,"Guided Tours, Self-Guided Tours - Walking",DC,"[{'postalCode': '20001', 'city': 'Washington',...","[{'credit': 'NPS Photo', 'title': 'African Ame..."
4,https://www.nps.gov/afbg/index.htm,African Burial Ground National Monument,afbg,African Burial Ground is the oldest and larges...,40.714527,-74.004474,"Arts and Culture, Guided Tours, Junior Ranger ...",NY,"[{'postalCode': '10007', 'city': 'New York', '...","[{'credit': 'NPS Photo', 'title': 'African Bur..."
5,https://www.nps.gov/agfo/index.htm,Agate Fossil Beds National Monument,agfo,"In the early 1900s, paleontologists unearthed ...",42.421704,-103.753886,"Arts and Culture, Cultural Demonstrations, Ast...",NE,"[{'postalCode': '69346', 'city': 'Harrison', '...","[{'credit': 'NPS Photo', 'title': 'Visitor Cen..."
6,https://www.nps.gov/alka/index.htm,Ala Kahakai National Historic Trail,alka,"Established in 2000 to preserve, protect and i...",19.144668,-155.890734,"Arts and Culture, Cultural Demonstrations, Aut...",HI,"[{'postalCode': '96740', 'city': 'Kailua-Kona'...","[{'credit': 'NPS Photo', 'title': 'Waiulaula B..."
7,https://www.nps.gov/alag/index.htm,Alagnak Wild River,alag,The headwaters of Alagnak Wild River lie withi...,59.051802,-156.112002,"Boating, Camping, Backcountry Camping, Fishing...",AK,"[{'postalCode': '99613', 'city': 'King Salmon'...","[{'credit': 'NPS/R. Wood', 'title': 'salmon in..."
8,https://www.nps.gov/anch/index.htm,Alaska Public Lands,anch,"Alaska’s parks, forests, and refuges are rich ...",61.2188,-149.894536,,AK,"[{'postalCode': '99501', 'city': 'Anchorage', ...","[{'credit': 'NPS/Josh Spice', 'title': 'Hikers..."
9,https://www.nps.gov/alca/index.htm,Alcatraz Island,alca,Alcatraz reveals stories of American incarcera...,37.826762,-122.423021,"Food, Wildlife Watching, Birdwatching, Shoppin...",CA,"[{'postalCode': '94133', 'city': 'San Francisc...","[{'credit': 'NPS', 'title': 'Alcatraz Island',..."


### Drop parks that are blank in "activities" column.

In [70]:
parks_list_cleaned_df = parks_list_cleaned_df[parks_list_cleaned_df['activities'].apply(lambda x: x != '')]

print(len(parks_list_cleaned_df))

463


### Finally, merge two datasets into what will be our main data set for passing data to our database, and then into Javascript.

In [71]:
final_parks_df = pd.merge(parks_list_cleaned_df, fee_passes_cleaned_df, how='inner', on="parkCode")

final_parks_df.head()

Unnamed: 0,url,fullName,parkCode,description,latitude,longitude,activities,states,addresses,images,isFeeFreePark,isInteragencyPassAccepted
0,https://www.nps.gov/abli/index.htm,Abraham Lincoln Birthplace National Historical...,abli,For over a century people from around the worl...,37.585866,-85.673305,"Astronomy, Stargazing, Food, Picnicking, Guide...",KY,"[{'postalCode': '42748', 'city': 'Hodgenville'...","[{'credit': 'NPS Photo', 'title': 'The Memoria...",True,False
1,https://www.nps.gov/acad/index.htm,Acadia National Park,acad,Acadia National Park protects the natural beau...,44.409286,-68.247501,"Arts and Culture, Cultural Demonstrations, Ast...",ME,"[{'postalCode': '04609', 'city': 'Bar Harbor',...","[{'credit': 'NPS / Kristi Rugg', 'title': ""Aca...",False,True
2,https://www.nps.gov/adam/index.htm,Adams National Historical Park,adam,From the sweet little farm at the foot of Penn...,42.255396,-71.011604,"Guided Tours, Living History, First Person Int...",MA,"[{'postalCode': '02169', 'city': 'Quincy', 'st...","[{'credit': 'NPS Photo', 'title': 'The John an...",False,True
3,https://www.nps.gov/afam/index.htm,African American Civil War Memorial,afam,"Over 200,000 African-American soldiers and sai...",38.9166,-77.026,"Guided Tours, Self-Guided Tours - Walking",DC,"[{'postalCode': '20001', 'city': 'Washington',...","[{'credit': 'NPS Photo', 'title': 'African Ame...",True,False
4,https://www.nps.gov/afbg/index.htm,African Burial Ground National Monument,afbg,African Burial Ground is the oldest and larges...,40.714527,-74.004474,"Arts and Culture, Guided Tours, Junior Ranger ...",NY,"[{'postalCode': '10007', 'city': 'New York', '...","[{'credit': 'NPS Photo', 'title': 'African Bur...",True,False


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Convert final DataFrames to our cleaned CSVs

In [72]:
final_parks_df.to_csv('Resources\Cleaned\parks_list_cleaned.csv', index=False, encoding='utf-8')
activities_df.to_csv('Resources\Cleaned/activities.csv', index=False, encoding='utf-8')