In [19]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
import time
import json

In [2]:
# Set up Splinter
browser = Browser('chrome')

In [3]:
#Read the visitor data (which we got from NPS API) into a Pandas DataFrame
park_info_df = pd.read_csv('../Output/park_info.csv')
park_info_df.head()

Unnamed: 0,Name,Park ID,Park Code,Designation,Latitude,Longitude,Entrance Fee,Entrance Fee Desc
0,Abraham Lincoln Birthplace National Historical...,77E0D7F0-1942-494A-ACE2-9004D2BDC59E,abli,National Historical Park,37.585866,-85.673305,Free,
1,Acadia National Park,6DA17C86-088E-4B4D-B862-7C1BD5CF236B,acad,National Park,44.409286,-68.247501,6.00,Vehicle reservations are not required for any ...
2,Adams National Historical Park,E4C7784E-66A0-4D44-87D0-3E072F5FEF43,adam,National Historical Park,42.255396,-71.011604,15.00,Access into the historic homes is by a tickete...
3,African American Civil War Memorial,1A47416F-DAA3-4137-9F30-14AF86B4E547,afam,,38.9166,-77.026,Free,
4,African Burial Ground National Monument,E6E1D22A-7A89-47F8-813C-B611059A8CF9,afbg,National Monument,40.714527,-74.004474,Free,


In [4]:
#The URL relies on these park codes, so we'll extract those:
park_codes = park_info_df["Park Code"].str.upper().tolist()
park_codes

['ABLI',
 'ACAD',
 'ADAM',
 'AFAM',
 'AFBG',
 'AGFO',
 'ALKA',
 'ALAG',
 'ANCH',
 'ALCA',
 'ALEU',
 'ALFL',
 'ALPO',
 'AMCH',
 'AMME',
 'AMIS',
 'ANAC',
 'ANDE',
 'ANJO',
 'ANIA',
 'ANTI',
 'APIS',
 'APPA',
 'APCO',
 'ARCH',
 'ARPO',
 'ARHO',
 'ASIS',
 'AZRU',
 'BADL',
 'BAWA',
 'BAND',
 'BEPA',
 'BEOL',
 'BELA',
 'BIBE',
 'BICY',
 'BIHO',
 'BISO',
 'BITH',
 'BICA',
 'BICR',
 'BISC',
 'BLCA',
 'BLRV',
 'BLSC',
 'BLRI',
 'BLUE',
 'BOWA',
 'BOAF',
 'BOHA',
 'BOST',
 'BRCR',
 'BRVB',
 'BRCA',
 'BUIS',
 'BUFF',
 'CABR',
 'CALI',
 'CANE',
 'CANA',
 'CARI',
 'CACH',
 'CANY',
 'CACO',
 'CAHA',
 'CAME',
 'CAKR',
 'CALO',
 'CAHI',
 'CARE',
 'CAJO',
 'CAVO',
 'CARL',
 'CAVE',
 'CAWO',
 'CAGR',
 'CASA',
 'CACL',
 'CAMO',
 'CATO',
 'CEBR',
 'CEBE',
 'CHCU',
 'CHAM',
 'CHIS',
 'CHPI',
 'CHYO',
 'CHAT',
 'CHOH',
 'CBPO',
 'CHCH',
 'CHIC',
 'CHIR',
 'CHRI',
 'CIRO',
 'CWDW',
 'CLBA',
 'COLO',
 'COLM',
 'COLT',
 'CONG',
 'COGA',
 'CORO',
 'COWP',
 'CRLA',
 'CRMO',
 'CUGA',
 'CUIS',
 'CURE',
 'CUVA',
 

In [13]:
#Define the base url:
url = 'https://irma.nps.gov/Stats/MvcReportViewer.aspx?_id=183bc6ee-a60e-4f4b-97e6-eef7184e5b0d&_m=Remote&_r=%2fNPS.Stats.Reports%2fPark+Specific+Reports%2fRecreation+Visitors+By+Month+(1979+-+Last+Calendar+Year)&_15=True&_16=True&_18=True&_19=True&_34=False&_35=False&_39=880px&Park='

In [16]:
#Initalize an empty list that will eventually contain all park info, with each park being represented by a dictionary
all_park_info = []

#Each item in this list will be a key for a key-value pair in each park's dictionary
months_list = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Total"]

#Loop through all codes in the park
for code in park_codes:
    
    try:
        #Append the code to the base url
        query_url = url + code
        #And visit the page for that url
        browser.visit(query_url)

        #Store the webpage information as html
        html = browser.html
        html_soup = soup(html, 'html.parser')

        #Extract the table_class from the webpage (since this changes each page)
        #Ashwin helped us a lot with this section
        table = html_soup.find(id='ReportViewer_fixedTable')
        table_class = table.find('table', cols='14')['class'][0]

        #Extract the table data using the class we just found
        table_data = table.find('table', class_=table_class)

        #From the table, extract each row:    
        all_row_data =table_data.find_all('td')

        #The data we are interested in is only the second row, which is <td> 44 to 57
        data = all_row_data[43:56]

        #Initalize an empty dictionary that will contain only this park's data
        single_park_dict = {}

        #Append the park code value to the "Code" key
        single_park_dict["Code"] = code

        #Initalize an empty list to store the data extracted from the website
        visits_list = []

        #Initalize an empty dictionary that will store each park's monthly visitors
        month_dict = {}

        #Loop through the data that we are interested in
        for i in range(13):

            #Add a key for the month name with a value of the month's visitors
            month_dict[months_list[i]] = data[i].text

        #Add the monthly visitor dictionary to the visits_list list
        visits_list.append(month_dict)

        #Add the visits_list to the park's whole dictionary (which now contains the park code and all the park's
        # monthly visitors in 2022)
        single_park_dict["Visits by Month"] = visits_list

        #Finally, add each single_park_dict to the whole all_park_info list:
        all_park_info.append(single_park_dict)
        
        print(f"Dictionary for {code} added")
        
        #wait until next request
        time.sleep(5)
    
    except (KeyError, IndexError):
        #If the webpage returns empty html (i.e. the URL doesn't work for that park code)
        print(f"No data available for {code}")
        
        #wait until next request
        time.sleep(5)

Dictionary for ABLI added
Dictionary for ACAD added
Dictionary for ADAM added


In [17]:
all_park_info

[{'Code': 'ABLI',
  'Visits by Month': [{'Jan': '5,833',
    'Feb': '10,174',
    'Mar': '16,447',
    'Apr': '25,848',
    'May': '30,360',
    'Jun': '36,056',
    'Jul': '36,369',
    'Aug': '32,346',
    'Sep': '31,130',
    'Oct': '21,857',
    'Nov': '12,503',
    'Dec': '6,784',
    'Total': '265,707'}]},
 {'Code': 'ACAD',
  'Visits by Month': [{'Jan': '10,411',
    'Feb': '13,045',
    'Mar': '22,476',
    'Apr': '105,126',
    'May': '327,466',
    'Jun': '603,023',
    'Jul': '791,358',
    'Aug': '785,236',
    'Sep': '697,292',
    'Oct': '521,646',
    'Nov': '70,675',
    'Dec': '22,506',
    'Total': '3,970,260'}]},
 {'Code': 'ADAM',
  'Visits by Month': [{'Jan': '84',
    'Feb': '125',
    'Mar': '204',
    'Apr': '262',
    'May': '1,343',
    'Jun': '2,109',
    'Jul': '2,041',
    'Aug': '1,876',
    'Sep': '1,811',
    'Oct': '2,207',
    'Nov': '425',
    'Dec': '361',
    'Total': '12,848'}]}]

In [18]:
# Quit the browsing session
browser.quit()

In [21]:
with open("../Output/monthly_visitors_2022.json", "w") as outfile:
   json.dump(all_park_info, outfile)