# National Park Analysis

In [57]:
#import libraries keys and setup data direcotry
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from datetime import date
import os
import sys
import random
from scipy.stats import linregress
from scipy.stats import normaltest
from citipy import citipy
import json
import gmaps
from math import nan


#Include api directory in path
sys.path.append("api")

# Import API key
from config import g_key
from config import np_key

#National Park Endpoints
np_endpoint_parks = "https://developer.nps.gov/api/v1/parks?"


#DataFrame Descriptions
#attendance_master_detail - National Visit - Location code, activity count - One record per park per year.
#Final_Budget_df - Clean National Park Budget Data - One record per park per year. 
#Max_Year_df - Max Year Visits With location codes -  One record per park. Representing visits from most recent year.
#Budget_Attendance_final_df - Merge of visit, budget (including adjusted for cpi), location codes,state, activity count. One record per year per park.
#National Park activities - One record per activity per national park.  (Just in case, but most likely won't use.)



## Import Park Attendance Data (Ana, Kevin-support)

In [None]:
#Import in park atendnce excel/csv  

In [3]:
# Import the park atendence csv file
attendance_df = pd.read_csv("data/Visists 1979-2019 v2.csv")
attendance_df

Unnamed: 0,Park Name,Year,Visits
0,Abraham Lincoln Birthplace NHP,2019,238226
1,Abraham Lincoln Birthplace NHP,1979,271231
2,Abraham Lincoln Birthplace NHP,1980,239074
3,Abraham Lincoln Birthplace NHP,1981,252245
4,Abraham Lincoln Birthplace NHP,1982,318707
...,...,...,...
13738,Zion NP,2014,3189696
13739,Zion NP,2015,3648846
13740,Zion NP,2016,4295127
13741,Zion NP,2017,4504812


In [4]:
#Data cleanse.
attendance_df.columns

Index(['Park Name', 'Year', 'Visits'], dtype='object')

In [5]:
#Checking the data types in Data Frame 
attendance_df.dtypes

Park Name    object
Year          int64
Visits        int64
dtype: object

In [6]:
attendance_df["Park Name"] = attendance_df["Park Name"].astype("str")
attendance_df.dtypes

Park Name    object
Year          int64
Visits        int64
dtype: object

In [7]:
# Filter through our attendence DataFrame to fibd only our National Parks
national_park_df = attendance_df[attendance_df["Park Name"].str.contains("NP")]
national_park_df

Unnamed: 0,Park Name,Year,Visits
41,Acadia NP,2019,3437286
42,Acadia NP,1979,2787366
43,Acadia NP,1980,2779666
44,Acadia NP,1981,2997972
45,Acadia NP,1982,3572114
...,...,...,...
13738,Zion NP,2014,3189696
13739,Zion NP,2015,3648846
13740,Zion NP,2016,4295127
13741,Zion NP,2017,4504812


## Import Operating Cost Data (Kevin)

In [8]:
#Import operating cost into data frame #Import operating cost into data frame 
#Budget2006_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2006.xlsx")
Budget2007_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2007.xlsx")
Budget2008_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2008.xlsx")
Budget2009_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2009.xlsx")
Budget2010_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2010.xlsx")
Budget2011_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2011.xlsx")
Budget2012_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2012.xlsx")
Budget2013_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2013.xlsx")
Budget2014_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2014.xlsx")
Budget2015_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2015.xlsx")
Budget2017_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2017.xlsx")
Budget2018_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2018.xlsx")
Budget2019_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2019.xlsx")
Budget2020_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2020.xlsx")
Budget2021_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2021.xlsx")


In [9]:
#Data cleansing
Clean_Budget2007_df = Budget2007_df.loc[:,["OPERATION OF THE NAT'L PARK SYSTEM","FY 2007 Request"]]
Clean_Budget2008_df = Budget2008_df.loc[:,["OPERATION OF THE NAT'L PARK SYSTEM","FY 2006 Presiden's Request"]]
Clean_Budget2009_df = Budget2009_df.loc[:,["Operation of National Park System","FY 2009 Presidents Request"]]
Clean_Budget2010_df = Budget2010_df.loc[:,["Operation of Nat Park","FY 2010 Preisdent Request"]]
Clean_Budget2011_df = Budget2011_df.loc[:,["Operation of The National Park System","FY 2011 President's Request"]]
Clean_Budget2012_df = Budget2012_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","2012 Presidents Requests"]]
Clean_Budget2013_df = Budget2013_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","FY 2013 Presidents Request"]]
Clean_Budget2014_df = Budget2014_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","2014 Requested"]]
Clean_Budget2015_df = Budget2015_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","2015 Reques t"]]
Clean_Budget2017_df = Budget2017_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","FY 2017 Request"]]
Clean_Budget2018_df = Budget2018_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM\nORGANIZATIONS","FY 2018\nRequest"]]
Clean_Budget2019_df = Budget2019_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS","FY 2019\nRequest9"]]
Clean_Budget2021_df = Budget2021_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS", " FY 2021\nRequest"]]
Clean_Budget2020_df = Budget2020_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS ", "FY 2020 Request "]]

In [10]:
# Adding New Column For Year
Clean_Budget2007_df["Year"]=2007
Clean_Budget2008_df["Year"]=2008
Clean_Budget2009_df["Year"]=2009
Clean_Budget2010_df["Year"]=2010
Clean_Budget2011_df["Year"]=2011
Clean_Budget2012_df["Year"]=2012
Clean_Budget2013_df["Year"]=2013
Clean_Budget2014_df["Year"]=2014
Clean_Budget2015_df["Year"]=2015
Clean_Budget2017_df["Year"]=2017
Clean_Budget2018_df["Year"]=2018
Clean_Budget2019_df["Year"]=2019
Clean_Budget2020_df["Year"]=2020
Clean_Budget2021_df["Year"]=2021

In [11]:
# Making the Same name for the columns from different years
Clean_Budget2007_df = Clean_Budget2007_df.rename(columns={"OPERATION OF THE NAT'L PARK SYSTEM":"Park Name","FY 2007 Request":"Budget Request"})
Clean_Budget2008_df = Clean_Budget2008_df.rename(columns={"OPERATION OF THE NAT'L PARK SYSTEM":"Park Name","FY 2006 Presiden's Request":"Budget Request"})
Clean_Budget2009_df = Clean_Budget2009_df.rename(columns={"Operation of National Park System":"Park Name","FY 2009 Presidents Request":"Budget Request"})
Clean_Budget2010_df = Clean_Budget2010_df.rename(columns={"Operation of Nat Park":"Park Name","FY 2010 Preisdent Request":"Budget Request"})
Clean_Budget2011_df = Clean_Budget2011_df.rename(columns={"Operation of The National Park System":"Park Name","FY 2011 President's Request":"Budget Request"})
Clean_Budget2012_df = Clean_Budget2012_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","2012 Presidents Requests":"Budget Request"})
Clean_Budget2013_df = Clean_Budget2013_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","FY 2013 Presidents Request":"Budget Request"})
Clean_Budget2014_df = Clean_Budget2014_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","2014 Requested":"Budget Request"})
Clean_Budget2015_df = Clean_Budget2015_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","2015 Reques t":"Budget Request"})
Clean_Budget2017_df = Clean_Budget2017_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","FY 2017 Request":"Budget Request"})
Clean_Budget2018_df = Clean_Budget2018_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM\nORGANIZATIONS":"Park Name","FY 2018\nRequest":"Budget Request"})
Clean_Budget2019_df = Clean_Budget2019_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS":"Park Name","FY 2019\nRequest9":"Budget Request"})
Clean_Budget2021_df = Clean_Budget2021_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS":"Park Name", " FY 2021\nRequest":"Budget Request"})
Clean_Budget2020_df = Clean_Budget2020_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS ":"Park Name", "FY 2020 Request ":"Budget Request"})

In [21]:
# Merge, drop NA, then look for only NP
Frames  = [Clean_Budget2007_df, Clean_Budget2008_df,Clean_Budget2009_df, Clean_Budget2010_df,Clean_Budget2011_df, Clean_Budget2012_df,Clean_Budget2013_df, Clean_Budget2014_df,Clean_Budget2015_df, Clean_Budget2021_df,Clean_Budget2017_df, Clean_Budget2018_df,Clean_Budget2019_df, Clean_Budget2020_df]
Total_Budget_df= pd.concat(Frames)
Total_Budget_df.dropna( inplace=True )
Final_Budget_df = Total_Budget_df[Total_Budget_df["Park Name"].str.contains("NP")]
#Added by Mike to change from object type to int
Final_Budget_df["Budget Request"] = Final_Budget_df["Budget Request"].astype("int64")
Final_Budget_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Final_Budget_df["Budget Request"] = Final_Budget_df["Budget Request"].astype("int64")


Unnamed: 0,Park Name,Budget Request,Year
1,Acadia NP,6545,2007
12,Arches NP,1251,2007
17,Badlands NP,3607,2007
21,Big Bend NP,5759,2007
27,Biscayne NP,3648,2007
...,...,...,...
353,Wrangell-Saint Elias NP&Pres,5634,2020
354,Yellowstone NP,34410,2020
355,Yosemite NP,30032,2020
357,Yukon-Charley Rivers NPres,1768,2020


In [22]:
Final_Budget_df.dtypes

Park Name         object
Budget Request     int64
Year               int64
dtype: object

## Import CPI table

In [26]:
cpi_df = pd.read_excel("data/cpi_table.xlsx")

In [27]:
cpi_df


Unnamed: 0,YEAR,CPI
0,2020,258.811
1,2019,255.657
2,2018,251.107
3,2017,245.12
4,2016,240.008
5,2015,237.017
6,2014,236.736
7,2013,232.957
8,2012,229.594
9,2011,224.939


## CPI Function

In [28]:

#The following function will return adjusted prior year abount based on CPI of current year and prior year.

def cpi_fnc ( current_year,prior_year, prior_amount ):
    
    ret_val = 0
    if type(prior_year) != 'int' or  type(current_year) != 'int':
        try:
            prior_year = int(prior_year)
            current_year = int(current_year)
        except:
            print(f"Type Error: Make sure to pass int values or strings that will convert to int")
        if len(cpi_df[cpi_df["YEAR"] == prior_year]) == 0:
                  print(f"Year {str(prior_year)} does not exist in CPI table")
        elif len(cpi_df[cpi_df["YEAR"] == current_year]) == 0:
                  print(f"Year {str(current_year)} does not exist in CPI table")
        else:
            v_curr_year_cpi = cpi_df[cpi_df["YEAR"] == current_year]["CPI"].values[0]
            v_prior_year_cpi = cpi_df[cpi_df["YEAR"] == prior_year]["CPI"].values[0]          
            adjusted_amount = prior_amount * float((v_curr_year_cpi/v_prior_year_cpi))
            ret_val = round(adjusted_amount,2)
    return ret_val
                

In [29]:
#CPI Test Statement

x = cpi_fnc(2020,2000,14888023)
print(str(x))


22376214.41


## Add CPI Adjusted Budget Amount columns to budget tables (Mike - Efrain)

In [32]:
#Add Column Adjusted Budget Column for 2020 CPI

Final_Budget_df["AdjBudg"] = 0

for index, row in Final_Budget_df.iterrows():
    Final_Budget_df.loc[index,"AdjBudg"] = cpi_fnc ( 2020, row["Year"], row["Budget Request"])

    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Final_Budget_df["AdjBudg"] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 does not exist in CPI table
Year 2021 do

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

In [33]:
Final_Budget_df

Unnamed: 0,Park Name,Budget Request,Year,AdjBudg
1,Acadia NP,6545,2007,7920.00
12,Arches NP,1251,2007,1561.54
17,Badlands NP,3607,2007,4371.00
21,Big Bend NP,5759,2007,6668.00
27,Biscayne NP,3648,2007,4196.00
...,...,...,...,...
353,Wrangell-Saint Elias NP&Pres,5634,2020,5634.00
354,Yellowstone NP,34410,2020,34410.00
355,Yosemite NP,30032,2020,30032.00
357,Yukon-Charley Rivers NPres,1768,2020,1768.00


## Geo Code Park Attendance Data Frame(Mike, Anna-support)

In [34]:
#Geocode work.  Utilizing existing Attendance 
np_master_df =pd.read_csv("data/national_park_master.csv")
np_master_df 

Unnamed: 0,parkCode,fullName,lat,lon,Park Name
0,acad,Acadia National Park,44.409286,-68.247501,Acadia NP
1,arch,Arches National Park,38.722618,-109.586367,Arches NP
2,badl,Badlands National Park,43.685848,-102.482942,Badlands NP
3,bibe,Big Bend National Park,29.298178,-103.22979,Big Bend NP
4,bisc,Biscayne National Park,25.490587,-80.210239,Biscayne NP
5,blca,Black Canyon Of The Gunnison National Park,38.577799,-107.724276,Black Canyon of the Gunnison NP
6,brca,Bryce Canyon National Park,37.583991,-112.182669,Bryce Canyon NP
7,cany,Canyonlands National Park,38.245558,-109.880162,Canyonlands NP
8,care,Capitol Reef National Park,38.282165,-111.247048,Capitol Reef NP
9,cave,Carlsbad Caverns National Park,32.140895,-104.552969,Carlsbad Caverns NP


In [35]:
#Connect to NP API parks endpoint to get park state and activities
parkCode_list = []
activities_list = []
np_master_df["State"] = " "
np_master_df["Activity Count"] = 0

for index, row in np_master_df.iterrows():
    params = { "api_key": np_key,
               "parkCode":row["parkCode"]
             }
    response = requests.get(np_endpoint_parks,params).json()
    if len(response["data"][0]["addresses"]) > 0:
        np_master_df.loc[index,"State"] = response["data"][0]["addresses"][0]["stateCode"]
    np_master_df.loc[index,"Activity Count"] = len(response["data"][0]["activities"])
    for activity in response["data"][0]["activities"]:
        parkCode_list.append(row["parkCode"])
        activities_list.append(activity["name"])
        
    

    



In [36]:
#Build NP Activities List
np_activities = pd.DataFrame ({"parkCode":parkCode_list, "Activity":activities_list})

#Build Park Activies DataFrame
np_activities_df = pd.DataFrame ({"parkCode":parkCode_list, "Activity":activities_list})
np_activities_df["Cnt"] = 1

#Pivot on parkCode Not working
#np_activities_pivot_df = pd.pivot(np_activities_df,index="parkCode", columns = "Activity", values = "Cnt").reset_index()



In [37]:
np_activities

Unnamed: 0,parkCode,Activity
0,acad,Arts and Culture
1,acad,Cultural Demonstrations
2,acad,Astronomy
3,acad,Stargazing
4,acad,Biking
...,...,...
1541,zion,Birdwatching
1542,zion,Park Film
1543,zion,Museum Exhibits
1544,zion,Shopping


In [38]:
np_master_df

Unnamed: 0,parkCode,fullName,lat,lon,Park Name,State,Activity Count
0,acad,Acadia National Park,44.409286,-68.247501,Acadia NP,ME,46
1,arch,Arches National Park,38.722618,-109.586367,Arches NP,UT,20
2,badl,Badlands National Park,43.685848,-102.482942,Badlands NP,SD,21
3,bibe,Big Bend National Park,29.298178,-103.22979,Big Bend NP,TX,19
4,bisc,Biscayne National Park,25.490587,-80.210239,Biscayne NP,FL,13
5,blca,Black Canyon Of The Gunnison National Park,38.577799,-107.724276,Black Canyon of the Gunnison NP,CO,13
6,brca,Bryce Canyon National Park,37.583991,-112.182669,Bryce Canyon NP,UT,28
7,cany,Canyonlands National Park,38.245558,-109.880162,Canyonlands NP,UT,23
8,care,Capitol Reef National Park,38.282165,-111.247048,Capitol Reef NP,UT,47
9,cave,Carlsbad Caverns National Park,32.140895,-104.552969,Carlsbad Caverns NP,NM,21


In [53]:
#creating data frame joining master table and detail visists
#attendance_master_detail = pd.concat([national_park_df,np_master_df],join="inner",keys=["Park Name"],axis=1)

attendance_master_detail = pd.merge(Final_Budget_df, np_master_df, how="inner", on=["Park Name"])

In [54]:
attendance_master_detail.head(-10)

Unnamed: 0,Park Name,Budget Request,Year,AdjBudg,parkCode,fullName,lat,lon,State,Activity Count
0,Acadia NP,6545,2007,7920.00,acad,Acadia National Park,44.409286,-68.247501,ME,46
1,Acadia NP,7185,2008,7920.00,acad,Acadia National Park,44.409286,-68.247501,ME,46
2,Acadia NP,7716,2009,7920.00,acad,Acadia National Park,44.409286,-68.247501,ME,46
3,Acadia NP,8454,2010,7920.00,acad,Acadia National Park,44.409286,-68.247501,ME,46
4,Acadia NP,8189,2011,7920.00,acad,Acadia National Park,44.409286,-68.247501,ME,46
...,...,...,...,...,...,...,...,...,...,...
513,Zion NP,8214,2011,32915.20,zion,Zion National Park,37.298393,-113.026514,UT,23
514,Zion NP,8132,2012,30058.76,zion,Zion National Park,37.298393,-113.026514,UT,23
515,Zion NP,7776,2013,8638.99,zion,Zion National Park,37.298393,-113.026514,UT,23
516,Zion NP,7707,2014,7034.73,zion,Zion National Park,37.298393,-113.026514,UT,23


In [None]:
#Max_Year_attendance_df = attendence_df[attendence_df["Park Name"].str.contains("NP")]
#Max_Year_attendance_df

Max_Year_attendance_grp = attendance_master_detail.groupby(['Park Name'])['Year'].max()
Max_Year_attendance_stg_df = Max_Year_attendance_grp.reset_index()
#attendance_master_detail[attendance_master_detail['Year']==Max_Year_attendance_stg_df['Year']
                        
#Max_Year_attendance_df


In [None]:
Max_Year_attendance_stg_df

In [None]:

Max_Year_stg_df = pd.merge(Max_Year_attendance_stg_df, np_master_df, how="inner", on=["Park Name"])

Max_Year_df = pd.merge(Max_Year_stg_df,national_park_df,how="inner", on=["Park Name","Year"])
Max_Year_df

In [None]:
## Merge Data add Custom fields (Efrain, Kevin, Ana)

In [None]:
# Merge data frames add 

In [None]:
 #Create any custom fields needed to partition data for analysis

In [None]:
#Duplicate all monetary fieldfs adjusting for inflation

# # Join Attendance Detail to Budget

In [49]:
#Merge 
Budget_Attendance_final_df = pd.merge(Final_Budget_df, national_park_df, how="inner", on=["Park Name","Year"])
Budget_Attendance_final_df = pd.merge(Budget_Attendance_final_df,np_master_df,how="inner", on=["Park Name"])

In [52]:
Budget_Attendance_final_df

Unnamed: 0,Park Name,Budget Request,Year,AdjBudg,Visits,parkCode,fullName,lat,lon,State,Activity Count
0,Acadia NP,6545,2007,7920.00,2202228,acad,Acadia National Park,44.409286,-68.247501,ME,46
1,Acadia NP,7185,2008,7920.00,2075857,acad,Acadia National Park,44.409286,-68.247501,ME,46
2,Acadia NP,7716,2009,7920.00,2227698,acad,Acadia National Park,44.409286,-68.247501,ME,46
3,Acadia NP,8454,2010,7920.00,2504208,acad,Acadia National Park,44.409286,-68.247501,ME,46
4,Acadia NP,8189,2011,7920.00,2374645,acad,Acadia National Park,44.409286,-68.247501,ME,46
...,...,...,...,...,...,...,...,...,...,...,...
523,Rocky Mountain NP,12884,2017,3376.14,4437215,romo,Rocky Mountain National Park,40.355692,-105.697288,CO,34
524,Rocky Mountain NP,12042,2018,12411.45,4590493,romo,Rocky Mountain National Park,40.355692,-105.697288,CO,34
525,Rocky Mountain NP,11479,2019,11620.61,4670053,romo,Rocky Mountain National Park,40.355692,-105.697288,CO,34
526,Pinnacles NP,3479,2014,3803.41,196635,pinn,Pinnacles National Park,36.490292,-121.181361,CA,26


## Create Current Year Summary Attendance Statistics (Ana - Mike Support)

In [None]:
# Top 10 by visit 2019

#Max_Year_df.groupby(["Park Name","Year","Visits"]).size().sort_values(ascending=False)

#Max_Year_df.sort_values('Visits', ascending=False).head(10)

Max_Year_df[["Park Name","Year","Visits"]].sort_values(by=["Visits"],ascending=False).head(10)

In [None]:
#Top 10 least visited Parks for 2019
Max_Year_df[["Park Name","Year","Visits"]].sort_values(by=["Visits"]).head(10)

In [None]:
Max_Year_df
locations= Max_Year_df[['lat', 'lon']].astype(float)
locations

In [None]:
#Heat Map All National parks parks.  Intensity = Visits
#attendance_master_detail.head()



gmaps.configure(api_key=g_key)
# Store 'Lat' and 'Lng' into  locations 
locations= Max_Year_df[['lat', 'lon']].astype(float)
#weights=Max_Year_df['Visits'].astype(float)
# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
#census_data_complete = census_data_complete.dropna()
visits_all = Max_Year_df["Visits"].astype(float)
max_visits = Max_Year_df["Visits"].max() 
#max_visits= Max_Year_df["Visits"].max()

# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=visits_all, 
                                 dissipating=False, max_intensity=max_visits,
                                 point_radius = 1)

fig.add_layer(heat_layer)

fig

## Create Heat Maps to Visualize Cost and Attendance (Kevin -  Mike support)

In [None]:
## Heat Map By Operating Costs

In [None]:
##Heat Map by Attendance

## Scater/Line Plots Operating Costs versus Attendance (Ana and Efrain)  Be creative 

## Trend Analysis (Compare Productivity Year Over Year) Mike and Kevin