# National Park Analysis

In [1]:
#import libraries keys and setup data direcotry
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from datetime import date
import os
import sys
import random
from scipy.stats import linregress
from scipy.stats import normaltest
from citipy import citipy
import json
import gmaps
from math import nan


#Include api directory in path
sys.path.append("api")

# Import API key
from config import g_key
from config import np_key

#National Park Endpoints
np_endpoint_parks = "https://developer.nps.gov/api/v1/parks?"


#DataFrame Descriptions
#attendance_master_detail - National Visit - Location code, activity count - One record per park per year.
#Final_Budget_df - Clean National Park Budget Data - One record per park per year. 
#Max_Year_df - Max Year Visits With location codes -  One record per park. Representing visits from most recent year.
#Budget_Attendance_final_df - Merge of visit, budget (including adjusted for cpi), location codes,state, activity count. One record per year per park.
#National Park activities - One record per activity per national park.  (Just in case, but most likely won't use.)



## Import Park Attendance Data (Ana, Kevin-support)

In [2]:
#Import in park atendnce excel/csv  

In [3]:
# Import the park atendence csv file
attendance_df = pd.read_csv("data/Visists 1979-2019 v2.csv")
attendance_df

Unnamed: 0,Park Name,Year,Visits
0,Abraham Lincoln Birthplace NHP,2019,238226
1,Abraham Lincoln Birthplace NHP,1979,271231
2,Abraham Lincoln Birthplace NHP,1980,239074
3,Abraham Lincoln Birthplace NHP,1981,252245
4,Abraham Lincoln Birthplace NHP,1982,318707
...,...,...,...
13738,Zion NP,2014,3189696
13739,Zion NP,2015,3648846
13740,Zion NP,2016,4295127
13741,Zion NP,2017,4504812


In [4]:
#Data cleanse.
attendance_df.columns

Index(['Park Name', 'Year', 'Visits'], dtype='object')

In [5]:
#Checking the data types in Data Frame 
attendance_df.dtypes

Park Name    object
Year          int64
Visits        int64
dtype: object

In [6]:
attendance_df["Park Name"] = attendance_df["Park Name"].astype("str")
attendance_df.dtypes

Park Name    object
Year          int64
Visits        int64
dtype: object

In [7]:
# Filter through our attendence DataFrame to fibd only our National Parks
national_park_df = attendance_df[attendance_df["Park Name"].str.contains("NP")]
national_park_df

Unnamed: 0,Park Name,Year,Visits
41,Acadia NP,2019,3437286
42,Acadia NP,1979,2787366
43,Acadia NP,1980,2779666
44,Acadia NP,1981,2997972
45,Acadia NP,1982,3572114
...,...,...,...
13738,Zion NP,2014,3189696
13739,Zion NP,2015,3648846
13740,Zion NP,2016,4295127
13741,Zion NP,2017,4504812


## Import Operating Cost Data (Kevin)

In [8]:
#Import operating cost into data frame #Import operating cost into data frame 
Budget2006_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2006.xlsx")
Budget2007_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2007.xlsx")
Budget2008_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2008.xlsx")
Budget2009_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2009.xlsx")
Budget2010_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2010.xlsx")
Budget2011_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2011.xlsx")
Budget2012_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2012.xlsx")
Budget2013_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2013.xlsx")
Budget2014_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2014.xlsx")
Budget2015_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2015.xlsx")
Budget2017_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2017.xlsx")
Budget2018_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2018.xlsx")
Budget2019_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2019.xlsx")
Budget2020_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2020.xlsx")
Budget2021_df = pd.read_excel("data/Nation_Park_Budget/National_Park_Budget_2021.xlsx")


In [14]:
#Import operating cost into data frame 
Budget_2002_df = pd.read_excel("data/Budget_Data/2002_Budget.xlsx")
# Budget_2002_df.head()
clean_budget_2002_df = Budget_2002_df[Budget_2002_df["National Park Name"].str.contains("NP")]
# clean_budget_2002_df.head()
estimated_budget_2002_df = clean_budget_2002_df.loc[:,("National Park Name","Estimated Budget")]
estimated_budget_2002_df["Year"] = 2002
estimated_budget_2002_df.head()
#Import operating cost into data frame 
Budget_2003_df = pd.read_excel("data/Budget_Data/2003_Budget.xlsx")
clean_budget_2003_df = Budget_2003_df[Budget_2003_df["National Park Name"].str.contains("NP")]
# clean_budget_2003_df.head()
estimated_budget_2003_df = clean_budget_2003_df.loc[:,("National Park Name","Estimated Budget")]
estimated_budget_2003_df["Year"] = 2003
estimated_budget_2003_df.head()
#Import operating cost into data frame 
Budget_2004_df = pd.read_excel("data/Budget_Data/2004_Budget.xlsx")
clean_budget_2004_df = Budget_2004_df[Budget_2004_df["National Park Name"].str.contains("NP")]
# clean_budget_2004_df.head()
estimated_budget_2004_df = clean_budget_2004_df.loc[:,("National Park Name","Estimated Budget")]
estimated_budget_2004_df["Year"] = 2004
estimated_budget_2004_df.head()
#Import operating cost into data frame 
Budget_2005_df = pd.read_excel("data/Budget_Data/2005_Budget.xlsx")
clean_budget_2005_df = Budget_2005_df[Budget_2005_df["National Park Name"].str.contains("NP")]
# clean_budget_2005_df.head()
estimated_budget_2005_df = clean_budget_2005_df.loc[:,("National Park Name","Estimated Budget")]
estimated_budget_2005_df["Year"] = 2005
estimated_budget_2005_df.head()
#Import operating cost into data frame 
Budget_2006_df = pd.read_excel("data/Budget_Data/2006_Budget.xlsx")
clean_budget_2006_df = Budget_2006_df[Budget_2006_df["National Park Name"].str.contains("NP")]
# clean_budget_2006_df.head()
estimated_budget_2006_df = clean_budget_2006_df.loc[:,("National Park Name","Estimated Budget")]
estimated_budget_2006_df["Year"] = 2006
estimated_budget_2006_df.head()
#Import operating cost into data frame 
Budget_2016_df = pd.read_excel("data/Budget_Data/2016_Budget.xlsx")
clean_budget_2016_df = Budget_2016_df[Budget_2016_df["National Park Name"].str.contains("NP")]
# clean_budget_2016_df.head()
estimated_budget_2016_df = clean_budget_2016_df.loc[:,("National Park Name","Estimated Budget")]
estimated_budget_2016_df["Year"] = 2016
estimated_budget_2016_df.head()




Unnamed: 0,National Park Name,Estimated Budget,Year
1,Acadia NP,8130,2016
13,Arches NP,1965,2016
17,Badlands NP,4412,2016
20,Big Bend NP,7258,2016
21,Big Cypress NPres,6835,2016


In [10]:
#Data cleansing
Clean_Budget2006_df = Budget2006_df.loc[:,["OPERATION OF THE NAT'L PARK SYSTEM","FY 2006 Presiden's Request"]]
Clean_Budget2007_df = Budget2007_df.loc[:,["OPERATION OF THE NAT'L PARK SYSTEM","FY 2007 Request"]]
Clean_Budget2008_df = Budget2008_df.loc[:,["OPERATION OF THE NAT'L PARK SYSTEM","FY 2008 President's Request"]]
Clean_Budget2009_df = Budget2009_df.loc[:,["Operation of National Park System","FY 2009 Presidents Request"]]
Clean_Budget2010_df = Budget2010_df.loc[:,["Operation of Nat Park","FY 2010 Preisdent Request"]]
Clean_Budget2011_df = Budget2011_df.loc[:,["Operation of The National Park System","FY 2011 President's Request"]]
Clean_Budget2012_df = Budget2012_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","2012 Presidents Requests"]]
Clean_Budget2013_df = Budget2013_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","FY 2013 Presidents Request"]]
Clean_Budget2014_df = Budget2014_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","2014 Requested"]]
Clean_Budget2015_df = Budget2015_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","2015 Reques t"]]
Clean_Budget2017_df = Budget2017_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM","FY 2017 Request"]]
Clean_Budget2018_df = Budget2018_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM\nORGANIZATIONS","FY 2018\nRequest"]]
Clean_Budget2019_df = Budget2019_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS","FY 2019\nRequest9"]]
Clean_Budget2021_df = Budget2021_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS", " FY 2021\nRequest"]]
Clean_Budget2020_df = Budget2020_df.loc[:,["OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS ", "FY 2020 Request "]]

In [11]:
# Adding New Column For Year
Clean_Budget2006_df["Year"]=2006
Clean_Budget2007_df["Year"]=2007
Clean_Budget2008_df["Year"]=2008
Clean_Budget2009_df["Year"]=2009
Clean_Budget2010_df["Year"]=2010
Clean_Budget2011_df["Year"]=2011
Clean_Budget2012_df["Year"]=2012
Clean_Budget2013_df["Year"]=2013
Clean_Budget2014_df["Year"]=2014
Clean_Budget2015_df["Year"]=2015
Clean_Budget2017_df["Year"]=2017
Clean_Budget2018_df["Year"]=2018
Clean_Budget2019_df["Year"]=2019
Clean_Budget2020_df["Year"]=2020
Clean_Budget2021_df["Year"]=2021

In [12]:
# Making the Same name for the columns from different years
Clean_Budget2006_df = Clean_Budget2006_df.rename(columns={"OPERATION OF THE NAT'L PARK SYSTEM":"Park Name","FY 2006 Presiden's Request":"Budget Request(dollars in thousands)"})
Clean_Budget2007_df = Clean_Budget2007_df.rename(columns={"OPERATION OF THE NAT'L PARK SYSTEM":"Park Name","FY 2007 Request":"Budget Request(dollars in thousands)"})
Clean_Budget2008_df = Clean_Budget2008_df.rename(columns={"OPERATION OF THE NAT'L PARK SYSTEM":"Park Name","FY 2008 President's Request":"Budget Request(dollars in thousands)"})
Clean_Budget2009_df = Clean_Budget2009_df.rename(columns={"Operation of National Park System":"Park Name","FY 2009 Presidents Request":"Budget Request(dollars in thousands)"})
Clean_Budget2010_df = Clean_Budget2010_df.rename(columns={"Operation of Nat Park":"Park Name","FY 2010 Preisdent Request":"Budget Request(dollars in thousands)"})
Clean_Budget2011_df = Clean_Budget2011_df.rename(columns={"Operation of The National Park System":"Park Name","FY 2011 President's Request":"Budget Request(dollars in thousands)"})
Clean_Budget2012_df = Clean_Budget2012_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","2012 Presidents Requests":"Budget Request(dollars in thousands)"})
Clean_Budget2013_df = Clean_Budget2013_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","FY 2013 Presidents Request":"Budget Request(dollars in thousands)"})
Clean_Budget2014_df = Clean_Budget2014_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","2014 Requested":"Budget Request(dollars in thousands)"})
Clean_Budget2015_df = Clean_Budget2015_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","2015 Reques t":"Budget Request(dollars in thousands)"})
Clean_Budget2017_df = Clean_Budget2017_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM":"Park Name","FY 2017 Request":"Budget Request(dollars in thousands)"})
Clean_Budget2018_df = Clean_Budget2018_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM\nORGANIZATIONS":"Park Name","FY 2018\nRequest":"Budget Request(dollars in thousands)"})
Clean_Budget2019_df = Clean_Budget2019_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS":"Park Name","FY 2019\nRequest9":"Budget Request(dollars in thousands)"})
Clean_Budget2021_df = Clean_Budget2021_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS":"Park Name", " FY 2021\nRequest":"Budget Request(dollars in thousands)"})
Clean_Budget2020_df = Clean_Budget2020_df.rename(columns={"OPERATION OF THE NATIONAL PARK SYSTEM ORGANIZATIONS ":"Park Name", "FY 2020 Request ":"Budget Request(dollars in thousands)"})

In [20]:
budget_frames = [estimated_budget_2002_df,estimated_budget_2003_df,estimated_budget_2004_df,estimated_budget_2005_df,estimated_budget_2006_df,estimated_budget_2016_df]
merged_budget_df = pd.concat(budget_frames)
merged_budget_df

Unnamed: 0,National Park Name,Estimated Budget,Year
1,Acadia NP,4706000,2002
12,Arches NP,1020000,2002
17,Badlands NP,3072000,2002
21,Big Bend NP,4735000,2002
27,Biscayne NP,3480000,2002
...,...,...,...
388,Wrangell-Saint Elias NP&Pres,5767,2016
389,Yellowstone NP,35200,2016
390,Yosemite NP,30974,2016
392,Yukon-Charley Rivers NPres,1824,2016


In [21]:
# Merge, drop NA, then look for only NP

Frames  = [Clean_Budget2008_df,Clean_Budget2007_df,Clean_Budget2008_df,Clean_Budget2009_df, Clean_Budget2010_df,Clean_Budget2011_df, Clean_Budget2012_df,Clean_Budget2013_df, Clean_Budget2014_df,Clean_Budget2015_df, Clean_Budget2021_df,Clean_Budget2017_df, Clean_Budget2018_df,Clean_Budget2019_df, Clean_Budget2020_df]
Total_Budget_df= pd.concat(Frames)
Total_Budget_df.dropna( inplace=True )
Final_Budget_df = Total_Budget_df[Total_Budget_df["Park Name"].str.contains("NP")]
# Added by Mike to change from object type to int
Final_Budget_df["Budget Request(dollars in thousands)"] = Final_Budget_df["Budget Request(dollars in thousands)"].astype("int64")
Final_Budget_df



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Final_Budget_df["Budget Request(dollars in thousands)"] = Final_Budget_df["Budget Request(dollars in thousands)"].astype("int64")


Unnamed: 0,Park Name,Budget Request(dollars in thousands),Year
1,Acadia NP,7185,2008
13,Arches NP,1712,2008
18,Badlands NP,4402,2008
22,Big Bend NP,6121,2008
28,Biscayne NP,3790,2008
...,...,...,...
353,Wrangell-Saint Elias NP&Pres,5634,2020
354,Yellowstone NP,34410,2020
355,Yosemite NP,30032,2020
357,Yukon-Charley Rivers NPres,1768,2020


In [None]:
Final_Budget_df.dtypes

## Import CPI table

In [None]:
cpi_df = pd.read_excel("data/cpi_table.xlsx")

In [None]:
cpi_df


## CPI Function

In [None]:

#The following function will return adjusted prior year abount based on CPI of current year and prior year.

def cpi_fnc ( current_year,prior_year, prior_amount ):
    
    ret_val = 0
    if type(prior_year) != 'int' or  type(current_year) != 'int':
        try:
            prior_year = int(prior_year)
            current_year = int(current_year)
        except:
            print(f"Type Error: Make sure to pass int values or strings that will convert to int")
        if len(cpi_df[cpi_df["YEAR"] == prior_year]) == 0:
                  print(f"Year {str(prior_year)} does not exist in CPI table")
        elif len(cpi_df[cpi_df["YEAR"] == current_year]) == 0:
                  print(f"Year {str(current_year)} does not exist in CPI table")
        else:
            v_curr_year_cpi = cpi_df[cpi_df["YEAR"] == current_year]["CPI"].values[0]
            v_prior_year_cpi = cpi_df[cpi_df["YEAR"] == prior_year]["CPI"].values[0]          
            adjusted_amount = prior_amount * float((v_curr_year_cpi/v_prior_year_cpi))
            ret_val = round(adjusted_amount,2)
    return ret_val
                

In [None]:
#CPI Test Statement

x = cpi_fnc(2020,2000,14888023)
print(str(x))


## Add CPI Adjusted Budget Amount columns to budget tables (Mike - Efrain)

In [None]:
#Add Column Adjusted Budget Column for 2020 CPI

Final_Budget_df["AdjBudg(dollars in thousands)"] = 0

for index, row in Final_Budget_df.iterrows():
    Final_Budget_df.loc[index,"AdjBudg(dollars in thousands)"] = cpi_fnc ( 2020, row["Year"], row["Budget Request(dollars in thousands)"])

    

## Geo Code Park Attendance Data Frame(Mike, Anna-support)

In [None]:
#Geocode work.  Utilizing existing Attendance 
np_master_df =pd.read_csv("data/national_park_master.csv")
np_master_df 

In [None]:
#Connect to NP API parks endpoint to get park state and activities
parkCode_list = []
activities_list = []
np_master_df["State"] = " "
np_master_df["Activity Count"] = 0

for index, row in np_master_df.iterrows():
    params = { "api_key": np_key,
               "parkCode":row["parkCode"]
             }
    response = requests.get(np_endpoint_parks,params).json()
    if len(response["data"][0]["addresses"]) > 0:
        np_master_df.loc[index,"State"] = response["data"][0]["addresses"][0]["stateCode"]
    np_master_df.loc[index,"Activity Count"] = len(response["data"][0]["activities"])
    for activity in response["data"][0]["activities"]:
        parkCode_list.append(row["parkCode"])
        activities_list.append(activity["name"])
        
    

    



In [None]:
#Build NP Activities List
np_activities = pd.DataFrame ({"parkCode":parkCode_list, "Activity":activities_list})

#Build Park Activies DataFrame
np_activities_df = pd.DataFrame ({"parkCode":parkCode_list, "Activity":activities_list})
np_activities_df["Cnt"] = 1

#Pivot on parkCode Not working
#np_activities_pivot_df = pd.pivot(np_activities_df,index="parkCode", columns = "Activity", values = "Cnt").reset_index()



In [None]:
np_activities

In [None]:
np_master_df

In [None]:
#creating data frame joining master table and detail visists
#attendance_master_detail = pd.concat([national_park_df,np_master_df],join="inner",keys=["Park Name"],axis=1)

attendance_master_detail = pd.merge(Final_Budget_df, np_master_df, how="inner", on=["Park Name"])

In [None]:
attendance_master_detail.head(-10)

In [None]:
#Max_Year_attendance_df = attendence_df[attendence_df["Park Name"].str.contains("NP")]
#Max_Year_attendance_df

Max_Year_attendance_grp = attendance_master_detail.groupby(['Park Name'])['Year'].max()
Max_Year_attendance_stg_df = Max_Year_attendance_grp.reset_index()
#attendance_master_detail[attendance_master_detail['Year']==Max_Year_attendance_stg_df['Year']
                        
#Max_Year_attendance_df


In [None]:
Max_Year_attendance_stg_df

In [None]:

Max_Year_stg_df = pd.merge(Max_Year_attendance_stg_df, np_master_df, how="inner", on=["Park Name"])

Max_Year_df = pd.merge(Max_Year_stg_df,national_park_df,how="inner", on=["Park Name","Year"])
Max_Year_df

In [None]:
## Merge Data add Custom fields (Efrain, Kevin, Ana)

In [None]:
# Merge data frames add 

In [None]:
 #Create any custom fields needed to partition data for analysis

In [None]:
#Duplicate all monetary fieldfs adjusting for inflation

# # Join Attendance Detail to Budget

In [None]:
#Merge 
Budget_Attendance_final_df = pd.merge(Final_Budget_df, national_park_df, how="inner", on=["Park Name","Year"])
Budget_Attendance_final_df = pd.merge(Budget_Attendance_final_df,np_master_df,how="inner", on=["Park Name"])

## Create Current Year Summary Attendance Statistics (Ana - Mike Support)

In [None]:
# Top 10 by visit 2019

#Max_Year_df.groupby(["Park Name","Year","Visits"]).size().sort_values(ascending=False)

#Max_Year_df.sort_values('Visits', ascending=False).head(10)

Max_Year_df[["Park Name","Year","Visits"]].sort_values(by=["Visits"],ascending=False).head(10)

In [None]:
#Top 10 least visited Parks for 2019
Max_Year_df[["Park Name","Year","Visits"]].sort_values(by=["Visits"]).head(10)

In [None]:
Max_Year_df
locations= Max_Year_df[['lat', 'lon']].astype(float)
locations

In [None]:
#Heat Map All National parks parks.  Intensity = Visits
#attendance_master_detail.head()



gmaps.configure(api_key=g_key)
# Store 'Lat' and 'Lng' into  locations 
locations= Max_Year_df[['lat', 'lon']].astype(float)
#weights=Max_Year_df['Visits'].astype(float)
# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
#census_data_complete = census_data_complete.dropna()
visits_all = Max_Year_df["Visits"].astype(float)
max_visits = Max_Year_df["Visits"].max() 
#max_visits= Max_Year_df["Visits"].max()

# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=visits_all, 
                                 dissipating=False, max_intensity=max_visits,
                                 point_radius = 1)

fig.add_layer(heat_layer)

fig

## Create Heat Maps to Visualize Cost and Attendance (Kevin -  Mike support)

In [None]:
## Heat Map By Operating Costs

In [None]:
##Heat Map by Attendance

## Scater/Line Plots Operating Costs versus Attendance (Ana and Efrain)  Be creative 

In [None]:
Analysis=Budget_Attendance_final_df.loc[:,["AdjBudg","Visits","Activity Count", "Park Name", "Year"]]
Analysis
Activity_Count = Analysis["Activity Count"].values.tolist()
Budget = Analysis["Activity Count"].values.tolist()
Visits = Analysis["Visits"].values.tolist()
Year=Analysis["Year"].values.tolist()
# Budget, =plt.plot(Year,Budget, color = "green", label= "Visits")
# Attendance, =plt.plot(Year,Visits, color = "red", label= "Visits")
Operation =plt.scatter(Budget,Visits, color = "blue", label= "Visits",edgecolors="black")
# plt.xlim(2005,2022)
# plt.ylim(5,11000)
plt.title("Operating Costs versus Attendance")
plt.xlabel("Budget (dollars in thousands)")
plt.ylabel("Visits")


In [None]:
Hist_data=Analysis.query('Year == "2019"')
Hist_data
group=Hist_data.groupby(["Park Name"]).head()
group
TopActivities=group.sort_values(by=["Activity Count"],ascending=False).head(10)
TopCost=group.sort_values(by=["AdjBudg"],ascending=False).head(10)
TopVisit=group.sort_values(by=["Visits"],ascending=False).head(10)
Histograms  = [TopVisit,TopCost,TopActivities]
Master_Top_10= pd.concat(Histograms)


## Trend Analysis (Compare Productivity Year Over Year) Mike and Kevin

In [None]:
#Graph Park Name vs Budget over time
Budget2021=Final_Budget_df.query('Year == "2021"')
Ten_Most_Expensive2021=Budget2021[["Park Name", "AdjBudg(dollars in thousands)", "Year"]].sort_values(by=["AdjBudg(dollars in thousands)"],ascending=False).head(10)
Ten_Most_Expensive2021

In [None]:
Scatter_df=Final_Budget_df.loc[:,["Park Name", "AdjBudg(dollars in thousands)", "Year"]]
Park_Name=Scatter_df["Park Name"]
Adjusted_Budget=Scatter_df["AdjBudg(dollars in thousands)"]
Year=Scatter_df["Year"]
plt.scatter(Year, Adjusted_Budget)
plt.grid()
plt.title("National Park Budget 2008 ~ 2020")
plt.xlabel("Year")
plt.ylabel("National Park Budget")
plt.legend()
plt.show()