In [16]:
# Dependencies
import pandas as pd
import requests
import json
import os
import datetime
import matplotlib.pyplot as plt

# Google API Key (maybe later)
from config import gkey


In [4]:
# Source files
attendance_raw = 'Resources/national_park_attendance_Prev3Y.xlsx'
park_info = 'Resources/national_park_locations.xlsx'
#wildfires = 'Resources/California_Fire_Incidents.csv'

# Read data and store in dataframe
attendance_data = pd.read_excel(attendance_raw)
park_info_data = pd.read_excel(park_info)
#wildfire_data = pd.read_csv(wildfires)


In [6]:
# Limit dataframe to California & Florifa
national_parks = park_info_data.loc[(park_info_data['ParkType']=='National Park')]
focus_parks = pd.DataFrame(national_parks.loc[(park_info_data['State'] == 'CA') | (park_info_data['State'] == 'FL')])
focus_parks.reset_index()

#focus_parks['FullName'] = focus_parks['Park']

# Create new 'FullName' column (empty)
focus_parks['FullName'] = focus_parks['Park'].str.replace(['NP'],'')#+focus_parks['ParkType'])

focus_parks['FullName']=focus_parks['Park'].str[:-2]+focus_parks['ParkType']

#focus_parks['Park'].replace('NP', 'National Park', inplace=True)
#focus_parks['Park'] = focus_parks['Park'].str.replace('NP','')

focus_parks.head(20)

Unnamed: 0,Park,UnitCode,ParkType,Region,State,FullName
30,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park
63,Channel Islands NP,CHIS,National Park,Pacific West,CA,Channel Islands National Park
87,Death Valley NP,DEVA,National Park,Pacific West,CA,Death Valley National Park
93,Dry Tortugas NP,DRTO,National Park,Southeast,FL,Dry Tortugas National Park
101,Everglades NP,EVER,National Park,Southeast,FL,Everglades National Park
191,Joshua Tree NP,JOTR,National Park,Pacific West,CA,Joshua Tree National Park
198,Kings Canyon NP,KICA,National Park,Pacific West,CA,Kings Canyon National Park
210,Lassen Volcanic NP,LAVO,National Park,Pacific West,CA,Lassen Volcanic National Park
281,Pinnacles NP,PINN,National Park,Pacific West,CA,Pinnacles National Park
293,Redwood NP,REDW,National Park,Pacific West,CA,Redwood National Park


In [7]:
# Find the geocoordinates (latitude and longitude)
park = "Yellowstone"

params = {"address": park, "key": gkey}

# Build URL using the Google Maps API
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

#print("Drill #1: The Geocoordinates of X")

# Run request
response = requests.get(base_url, params=params)

# print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
# print(response.url)

# Convert to JSON
park_geo = response.json()

# Extract lat/lng
lat = park_geo["results"][0]["geometry"]["location"]["lat"]
lng = park_geo["results"][0]["geometry"]["location"]["lng"]

# Print results
print(f"{park}: {lat}, {lng}")

Yellowstone: 44.427963, -110.588455


In [8]:
# Build URL using the Google Maps API

lats=[]
longs=[]
cost=[]

base_url = "https://maps.googleapis.com/maps/api/geocode/json"

for Park in focus_parks['FullName']:
    params = {"address": Park, "key": gkey}
    response = requests.get(base_url, params=params)
    park_geo = response.json()
    lats.append(park_geo["results"][0]["geometry"]["location"]["lat"])
    longs.append(park_geo["results"][0]["geometry"]["location"]["lng"])           
    

In [9]:
# Add geocoordinates to dataframe
focus_parks['Latitude']=lats
focus_parks['Longitude']=longs
focus_parks.head()

Unnamed: 0,Park,UnitCode,ParkType,Region,State,FullName,Latitude,Longitude
30,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831
63,Channel Islands NP,CHIS,National Park,Pacific West,CA,Channel Islands National Park,34.006936,-119.778533
87,Death Valley NP,DEVA,National Park,Pacific West,CA,Death Valley National Park,36.505389,-117.079408
93,Dry Tortugas NP,DRTO,National Park,Southeast,FL,Dry Tortugas National Park,24.633333,-82.92
101,Everglades NP,EVER,National Park,Southeast,FL,Everglades National Park,25.745929,-80.554956


In [14]:
# Combine the data into a single dataframe  
park_data = pd.merge(focus_parks, attendance_data, how="left", on=["Park", "Park"])
park_data.head()

Unnamed: 0,Park,UnitCode,ParkType,Region,State,FullName,Latitude,Longitude,Code,Month,Visitors
0,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-01-01 00:00:00,79041.0
1,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-02-01 00:00:00,91655.0
2,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-03-01 00:00:00,10962.0
3,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-04-01 00:00:00,2358.0
4,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-05-01 00:00:00,43692.0


In [29]:
park_data['Calendar Year'] = pd.DatetimeIndex(park_data['Month']).year
park_data['Calendar Month'] = pd.DatetimeIndex(park_data['Month']).month
park_data.head(50)

Unnamed: 0,Park,UnitCode,ParkType,Region,State,FullName,Latitude,Longitude,Code,Month,Visitors,Year,Calandar Year,Calendar Month,Calendar Year
0,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-01-01 00:00:00,79041.0,2020,2020,1,2020
1,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-02-01 00:00:00,91655.0,2020,2020,2,2020
2,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-03-01 00:00:00,10962.0,2020,2020,3,2020
3,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-04-01 00:00:00,2358.0,2020,2020,4,2020
4,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-05-01 00:00:00,43692.0,2020,2020,5,2020
5,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-06-01 00:00:00,32646.0,2020,2020,6,2020
6,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-07-01 00:00:00,28770.0,2020,2020,7,2020
7,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-08-01 00:00:00,34416.0,2020,2020,8,2020
8,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-09-01 00:00:00,18276.0,2020,2020,9,2020
9,Biscayne NP,BISC,National Park,Southeast,FL,Biscayne National Park,25.482423,-80.20831,BISC,2020-10-01 00:00:00,17436.0,2020,2020,10,2020


In [39]:
# Split up our data into groups based upon 'bikeid' and 'gender'
year_groups = park_data.groupby(['Calendar Year','Calendar Month'])

# Create a new variable that holds the sum of our groups
sum_it_up = year_groups.sum()
sum_it_up.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Latitude,Longitude,Visitors,Year,Calandar Year
Calendar Year,Calendar Month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017,1,409.688152,-1319.709203,766308.0,24204,24204
2017,2,409.688152,-1319.709203,713317.0,24204,24204
2017,3,409.688152,-1319.709203,1016798.0,24204,24204
2017,4,409.688152,-1319.709203,1161838.0,24204,24204
2017,5,409.688152,-1319.709203,1284854.0,24204,24204
2017,6,409.688152,-1319.709203,1353126.0,24204,24204
2017,7,409.688152,-1319.709203,1652030.0,24204,24204
2017,8,409.688152,-1319.709203,1583956.0,24204,24204
2017,9,409.688152,-1319.709203,1296326.0,24204,24204
2017,10,409.688152,-1319.709203,1082981.0,24204,24204


In [27]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the visutors for each calendar year
# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 


mean = park_data.groupby('Calendar Month').mean()['Visitors']
median = park_data.groupby('Calendar Month').median()['Visitors']
variance = park_data.groupby('Calendar Month').var()['Visitors']
std_dev = park_data.groupby('Calendar Month').std()['Visitors']
SEM = park_data.groupby('Calendar Month').sem()['Visitors']

# Assemble the resulting series into a single summary dataframe.
park_stats = pd.DataFrame({'Mean': mean,
              'Median':median,
              'Variance': variance,
              'Standard Deviation': std_dev,
                         'SEM':SEM})
park_stats.head(12)

Unnamed: 0_level_0,Mean,Median,Variance,Standard Deviation,SEM
Calendar Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,61393.125,26234.0,5287437000.0,72714.764692,10495.472242
2,67641.604167,26092.0,6314422000.0,79463.338589,11469.544981
3,78687.0,30803.0,11757510000.0,108432.049317,15650.818215
4,74192.625,34330.5,10860690000.0,104214.648238,15042.088803
5,82997.354167,41381.0,11579480000.0,107608.006993,15531.877951
6,102189.479167,66044.5,15721990000.0,125387.345128,18098.104366
7,125212.833333,71268.5,24009650000.0,154950.4723,22365.174223
8,118096.958333,82618.0,20834800000.0,144342.638131,20834.065245
9,94679.0,49345.5,17421040000.0,131988.775982,19050.938836
10,88696.916667,42311.0,11985760000.0,109479.512797,15802.006546


In [None]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen
# Using the aggregation method, produce the same summary statistics in a single line
# Google take the wheel ...
summary_stats= park_data.groupby('Park').agg({'Visitors':['mean','median','var','std','sem']})
summary_stats