In [1]:
import chardet
import numpy as np
import pandas as pd
import datetime
import codecs

import os
import glob

import progressbar
from time import sleep

import matplotlib.pyplot as plt

## Load and Format the Data
---

In [88]:
# specify filename to load for processing and aggregation
filename = './data/Raw-Data/reservations2014.csv'

In [89]:
# attempt to load the data using various codecs
types_of_encoding = ["utf8", "cp1252"]
for encoding_type in types_of_encoding:
    with codecs.open(filename, encoding = encoding_type, errors ='replace') as csvfile:
        rez = pd.read_csv(csvfile)

In [90]:
# function to format the imported data, drop all categories of reservations except campsites
def nps_site_format(df):
    # get only reservations from the National Park Service, OrdID of 128
    df = df[df['OrgID'] == 128]
    
    # get only site type entities
    df = df[df['EntityType'] == 'Site']
    
    # convert date columns to datetime, y-m-d
    df['EndDate'] = pd.to_datetime(df['EndDate'], format= '%Y-%m-%d')
    df['StartDate'] = pd.to_datetime(df['StartDate'], format= '%Y-%m-%d')
    df['OrderDate'] = pd.to_datetime(df['OrderDate'], format= '%Y-%m-%d')
    
    # calculate the stay length of the reservation
    df['StayLen'] = df['EndDate'] - df['StartDate']
    df['StayLen'] = df['StayLen'].dt.days + 1
    
    # calculate the days in advance the resveration was made
    # note: some reservations were recorded after the start date, those values are imputed as zero
    df['BookingHorizon'] = df['StartDate'] - df['OrderDate']
    df['BookingHorizon'] = df['BookingHorizon'].dt.days.clip(lower=0)
    
    # calculate the daily rate for each reservation
    df['DailyRate'] = df['TotalBeforeTax'] / df['StayLen']
    
    # fill nulls in fee columns with zeroes
    df[['UseFee', 'TranFee', 'AttrFee']] = df[['UseFee', 'TranFee', 'AttrFee']].fillna(0)
    
    # drop facility attribute columns
    df = df.drop(axis = 1, columns = ['FacilityState', 'FacilityLongitude', 'FacilityLatitude', 'UseType',
                                      'CustomerZIP', 'CustomerState', 'CustomerCountry', 'FacilityZIP', 'EntityID'])
    
    # drop categorical columns
    df = df.drop(axis = 1, columns = ['Tent', 'Popup', 'Trailer', 
                              'RVMotorhome', 'Boat', 'HorseTrailer', 'Car', 'FifthWheel', 
                              'Van', 'CanoeKayak', 'BoatTrailer', 'Motorcycle', 'Truck', 
                              'Bus', 'Bicycle', 'Snowmobile', 'OffRoadlAllTerrainVehicle', 
                              'PowerBoat', 'PickupCamper', 'LargeTentOver9x12', 'SmallTent', 'Marinaboat'])
    
    # drop nulls in important columns
    df = df.dropna(axis = 0, subset = ['FacilityID', 'StartDate', 'EndDate'])
    
    # drop duplicate order numbers
    df = df.drop_duplicates(['OrderNumber'])
    
    return df

In [91]:
# format imported data
rez = nps_site_format(rez)

In [92]:
rez.describe()

Unnamed: 0,HistoricalReservationID,OrgID,ParentLocationID,LegacyFacilityID,ProductID,FacilityID,Tax,UseFee,TranFee,AttrFee,TotalBeforeTax,TotalPaid,NumberOfPeople,StayLen,BookingHorizon,DailyRate
count,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0,449046.0
mean,1630043000.0,128.0,74317.484777,74063.764583,234459.237227,233331.200523,0.0,36.819264,1.818651,0.229326,39.475188,39.476579,4.397055,3.288342,64.608332,11.828373
std,73894840.0,0.0,19.967217,8965.146544,56021.873974,2873.918314,0.0,45.423301,4.249561,3.224915,44.624556,44.625904,8.727256,1.714692,65.521091,14.20719
min,1500638000.0,128.0,74266.0,70851.0,139983.0,232432.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
25%,1560651000.0,128.0,74324.0,70930.0,203077.0,232452.0,0.0,18.0,0.0,0.0,18.0,18.0,2.0,2.0,7.0,7.666667
50%,1629017000.0,128.0,74325.0,70970.0,205451.0,232489.0,0.0,25.0,0.0,0.0,29.0,29.0,3.0,3.0,39.0,10.0
75%,1691902000.0,128.0,74327.0,72159.0,281074.0,233241.0,0.0,45.0,0.0,0.0,46.0,46.0,5.0,4.0,122.0,13.333333
max,1759446000.0,128.0,74330.0,114702.0,372022.0,250796.0,0.0,1476.0,85.0,168.0,1476.0,1476.0,600.0,31.0,722.0,770.0


In [93]:
rez.isnull().sum()

HistoricalReservationID    0
OrderNumber                0
Agency                     0
OrgID                      0
CodeHierarchy              0
RegionCode                 0
RegionDescription          0
ParentLocationID           0
ParentLocation             0
LegacyFacilityID           0
Park                       0
SiteType                   0
ProductID                  0
EntityType                 0
FacilityID                 0
Tax                        0
UseFee                     0
TranFee                    0
AttrFee                    0
TotalBeforeTax             0
TotalPaid                  0
StartDate                  0
EndDate                    0
OrderDate                  0
NumberOfPeople             0
StayLen                    0
BookingHorizon             0
DailyRate                  0
dtype: int64

In [94]:
rez.dtypes

HistoricalReservationID             int64
OrderNumber                        object
Agency                             object
OrgID                               int64
CodeHierarchy                      object
RegionCode                         object
RegionDescription                  object
ParentLocationID                    int64
ParentLocation                     object
LegacyFacilityID                    int64
Park                               object
SiteType                           object
ProductID                           int64
EntityType                         object
FacilityID                        float64
Tax                               float64
UseFee                            float64
TranFee                           float64
AttrFee                           float64
TotalBeforeTax                    float64
TotalPaid                         float64
StartDate                  datetime64[ns]
EndDate                    datetime64[ns]
OrderDate                  datetim

In [95]:
for dtype in ['float','int','object']:
    selected_dtype = rez.select_dtypes(include=[dtype])
    mean_usage_b = selected_dtype.memory_usage(deep=True).mean()
    mean_usage_mb = mean_usage_b / 1024 ** 2
    print("Average memory usage for {} columns: {:03.2f} MB".format(dtype,mean_usage_mb))

Average memory usage for float columns: 3.43 MB
Average memory usage for int columns: 3.43 MB
Average memory usage for object columns: 28.17 MB


In [96]:
def mem_usage(pandas_obj):
    if isinstance(pandas_obj,pd.DataFrame):
        usage_b = pandas_obj.memory_usage(deep=True).sum()
    else: # we assume if not a df it's a series
        usage_b = pandas_obj.memory_usage(deep=True)
    usage_mb = usage_b / 1024 ** 2 # convert bytes to megabytes
    return "{:03.2f} MB".format(usage_mb)

In [97]:
rez_int = rez.select_dtypes(include=['int'])
converted_int = rez_int.apply(pd.to_numeric,downcast='unsigned')
print(mem_usage(rez_int))
print(mem_usage(converted_int))
compare_ints = pd.concat([rez_int.dtypes, converted_int.dtypes],axis=1)
compare_ints.columns = ['before','after']
compare_ints.apply(pd.Series.value_counts)

23.98 MB
11.56 MB


Unnamed: 0,before,after
uint8,,1.0
uint16,,1.0
uint32,,4.0
int64,6.0,


In [98]:
rez_float = rez.select_dtypes(include=['float'])
converted_float = rez_float.apply(pd.to_numeric,downcast='float')
print(mem_usage(rez_float))
print(mem_usage(converted_float))
compare_floats = pd.concat([rez_float.dtypes,converted_float.dtypes],axis=1)
compare_floats.columns = ['before','after']
compare_floats.apply(pd.Series.value_counts)

37.69 MB
20.56 MB


Unnamed: 0,before,after
float32,,10.0
float64,10.0,


In [99]:
optimized_rez = rez.copy()
optimized_rez[converted_int.columns] = converted_int
optimized_rez[converted_float.columns] = converted_float
print(mem_usage(rez))
print(mem_usage(optimized_rez))

346.79 MB
317.24 MB


In [100]:
rez.to_csv("./Data/Cleaned-Data/nps_res_2014.csv", index = False)
optimized_rez.to_csv("./Data/Cleaned-Data/nps_optimized_2014.csv", index = False)

In [101]:
optimized_rez.head()

Unnamed: 0,HistoricalReservationID,OrderNumber,Agency,OrgID,CodeHierarchy,RegionCode,RegionDescription,ParentLocationID,ParentLocation,LegacyFacilityID,...,AttrFee,TotalBeforeTax,TotalPaid,StartDate,EndDate,OrderDate,NumberOfPeople,StayLen,BookingHorizon,DailyRate
18,1722068135,2-29871861,NPS,128,|1|70904|74325|74283|70967|,GRSM-5460,Great Smoky Mountains National Park,74325,Southeast Region,70967,...,0.0,40.0,40.0,2014-08-06,2014-08-08,2014-08-07,1,3.0,0.0,13.333333
23,1722068468,2-29871865,NPS,128,|1|70904|74324|74297|70923|,ZION-1590,Zion National Park,74324,Intermountain Region,70923,...,0.0,48.0,48.0,2014-10-14,2014-10-17,2014-08-07,2,4.0,68.0,12.0
27,1722068799,2-29871866,NPS,128,|1|70904|74325|74283|70959|,GRSM-5460,Great Smoky Mountains National Park,74325,Southeast Region,70959,...,0.0,45.0,45.0,2014-10-11,2014-10-14,2014-08-07,18,4.0,65.0,11.25
29,1722069111,2-29871867,NPS,128,|1|70904|74324|72144|96669|,GRSA-1470,Great Sand Dunes National Park,74324,Intermountain Region,96669,...,0.0,160.0,160.0,2014-10-10,2014-10-12,2014-08-07,30,3.0,64.0,53.333332
33,1722069685,2-29871870,NPS,128,|1|70904|74268|74293|70851|,SHEN-4840,Shenandoah National Park,74268,Northeast Region,70851,...,0.0,30.0,30.0,2014-08-15,2014-08-17,2014-08-07,4,3.0,8.0,10.0


In [296]:
optimized_rez[optimized_rez['FacilityID'] == 232490]['ProductID'].nunique()

361

## Testing Fields
---

In [297]:
datelist = pd.date_range(optimized_rez['StartDate'].min(), optimized_rez['EndDate'].max()).tolist()
jan_1 = datelist[datelist.index(pd.Timestamp('2014-01-01'))]
print(jan_1.date())
jan_1_filter = (rez['StartDate'] <= jan_1) & (rez['EndDate'] >= jan_1)

2014-01-01


In [298]:
day = optimized_rez[jan_1_filter].groupby('FacilityID').agg({'StayLen': np.mean, 'BookingHorizon': np.mean,
                                                       'UseFee' : np.mean, 'NumberOfPeople' : np.sum, 
                                                       'DailyRate' : np.sum})

In [299]:
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(),
               ' ', progressbar.ETA(),
               ' ', progressbar.AdaptiveETA()]

In [301]:
date_park_list = []
test_len = optimized_rez[jan_1_filter].groupby(['Park', 'RegionDescription']).mean().shape[0]
bar = progressbar.ProgressBar(maxval=test_len, widgets=widgets)

bar.start()
i = 0

for index, row in optimized_rez[jan_1_filter].groupby(['Park', 'RegionDescription', 'FacilityID']).nunique().iterrows():
    
    day_group = optimized_rez[jan_1_filter].groupby('FacilityID').agg({'StayLen': np.mean, 'BookingHorizon': np.mean,
                                                       'UseFee' : np.mean, 'NumberOfPeople' : np.sum, 
                                                       'DailyRate' : np.sum, 'ProductID': 'nunique'})
    
    stay_len = day_group.loc[index[2], 'StayLen']
    book_hor = day_group.loc[index[2], 'BookingHorizon']
    avg_fee = day_group.loc[index[2], 'UseFee']
    num_people = day_group.loc[index[2], 'NumberOfPeople']
    day_revenue = day_group.loc[index[2], 'DailyRate']
    sites_booked = day_group.loc[index[2], 'ProductID']
    
    
    date_park_list.append([jan_1, index[2], index[0], index[1], row['OrderNumber'], num_people, sites_booked, stay_len, book_hor, avg_fee, day_revenue])
    sleep(0.00001)
    
    i += 1
    bar.update(i)
bar.finish()



In [302]:
rez_test = pd.DataFrame(date_park_list, columns = ['Date', 'FacilityID', 'Site', 
                                        'Park', 'Reservations', 'NumberOfPeople', 'SitesBooked',
                                        'AvgStayLen', 'AvgBookingHorizon', 'AverageFee', 'DailyRevenue'])

rez_test.head()

Unnamed: 0,Date,FacilityID,Site,Park,Reservations,NumberOfPeople,SitesBooked,AvgStayLen,AvgBookingHorizon,AverageFee,DailyRevenue
0,2014-01-01,232473.0,BLACK ROCK CAMPGROUND,Joshua Tree National Park,114,397,84,3.561404,17.447369,31.052631,1024.482544
1,2014-01-01,234723.0,BLACK ROCK EQUESTRIAN CAMPGROUND,Joshua Tree National Park,1,6,1,2.0,4.0,15.0,7.5
2,2014-01-01,246889.0,Bear Island Campground,Big Cypress National Preserve,6,11,6,3.666667,6.666667,26.666666,43.333332
3,2014-01-01,246890.0,Burns Lake Campground,Big Cypress National Preserve,6,17,6,5.666667,12.666667,84.0,88.533333
4,2014-01-01,233309.0,CAMP GATEWAY- BROOKLYN NY,Gateway National Recreation Area,2,3,2,7.0,27.0,60.0,18.888889


## Site Aggregation for Modeling
---

In [108]:
# new function for aggregating the data by campsite and date
def nps_site_aggregator(df):
    
    # establish list for each site's daily stats
    site_list = []
    
    # create list of days to run through
    datelist = pd.date_range(df['StartDate'].min(), df['EndDate'].max()).tolist()
    
    # create progress bar object
    widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(),
               ' ', progressbar.ETA(),
               ' ', progressbar.AdaptiveETA()]
    
    bar = progressbar.ProgressBar(maxval=len(datelist), widgets=widgets)
    
    bar.start()
    count = 0
    
    # cycle through all days
    for date in datelist:
        
        #create a mask to find only reservations that inlcude the specified date
        date_mask = (df['StartDate'] <= date) & (df['EndDate'] >= date)
        
        # cycle through all resvervations, as grouped by the site and the park
        for index, row in df[date_mask].groupby(['Park', 'RegionDescription', 'FacilityID']).nunique().iterrows():
            
            # group the filtered dataframe by unique site ID, aggregate the columns appropriately
            day_group = df[date_mask].groupby('FacilityID').agg({'StayLen': np.mean, 'BookingHorizon': np.mean,
                                                       'UseFee' : np.mean, 'NumberOfPeople' : np.sum, 
                                                       'DailyRate' : np.sum, 'ProductID': 'nunique'})
            # assign variables for the daily 
            stay_len = day_group.loc[index[2], 'StayLen']
            book_hor = day_group.loc[index[2], 'BookingHorizon']
            avg_fee = day_group.loc[index[2], 'UseFee']
            num_people = day_group.loc[index[2], 'NumberOfPeople']
            day_revenue = day_group.loc[index[2], 'DailyRate']
            sites_booked = day_group.loc[index[2], 'ProductID']
            
            # add daily reservation information to the list
            site_list.append([date.date(), index[2], index[0], index[1], 
                              row['OrderNumber'], num_people, sites_booked, stay_len, 
                              book_hor, avg_fee, day_revenue])
            
        
        count += 1
        bar.update(count)
    
    bar.finish()
    return pd.DataFrame(site_list, columns = ['Date', 'FacilityID', 'Site', 'Park', 'Reservations', 
                                              'NumberOfPeople', 'SitesBooked', 'AvgStayLen', 'AvgBookingHorizon', 
                                              'AverageFee', 'DailyRevenue'])

In [109]:
# run site aggregator
# note: this may take some time depending on the size and parameters of the dataframe
rez_agg = nps_site_aggregator(optimized_rez)



In [110]:
rez_agg['Park'].unique()

array(['Yosemite National Park', 'Big South Fork National River',
       'Zion National Park', 'Great Smoky Mountains National Park',
       'Grand Canyon National Park', 'Point Reyes National Seashore',
       'Shenandoah National Park', 'Joshua Tree National Park',
       'Acadia National Park', 'Gulf Islands National Seashore',
       'Pinnacles National Park', 'Blue Ridge Parkway',
       'Big Bend National Park', 'Headquarters - Arches National Park',
       'Big Cypress National Preserve', 'Prince William Forest Park',
       'Chickasaw National Recreation Area',
       'Gateway National Recreation Area', 'Catoctin Mountain Park',
       'Cape Hatteras National Seashore', 'Channel Islands National Park',
       'Fort Hunt Park', 'Cape Lookout National Seashore',
       'Buffalo National River', 'Colorado National Monument',
       'Golden Gate National Recreation Area',
       'Death Valley National Park',
       'Headquarters - Mammoth Cave National Park',
       'Catoctin Natio

In [111]:
rez_agg.to_csv("./data/Aggregated-Data/nps_agg_2014.csv", index = False)
print(mem_usage(rez_agg))

8.25 MB


In [112]:
rez_agg.groupby('Site').mean()

Unnamed: 0_level_0,FacilityID,Reservations,NumberOfPeople,SitesBooked,AvgStayLen,AvgBookingHorizon,AverageFee,DailyRevenue
Site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AKERS,234442.0,2.120482,52.385542,2.036145,3.425703,72.660643,123.368674,71.843372
ALLEY SPRING,234046.0,20.817734,121.310345,18.935961,4.841102,105.635485,66.178435,293.371918
ANACAPA ISLAND,232502.0,4.527094,15.261084,1.566502,2.870586,31.572013,13.758518,32.770936
ANTHONY CREEK HORSE CAMP,232485.0,2.400000,8.550000,2.260000,4.548833,70.092500,59.516667,31.600000
APGAR GROUP SITES,234669.0,4.513333,53.720000,3.420000,4.502043,202.914390,193.593753,172.446668
APPALACHIAN CLUBHOUSE,233299.0,1.064516,80.677419,1.000000,1.112903,164.193548,401.612903,370.000000
ASPENGLEN CAMPGROUND,233187.0,85.366412,263.679389,50.633588,3.447117,71.772399,32.943319,873.587779
ASSATEAGUE ISLAND NATIONAL SEASHORE,232507.0,131.849123,549.242105,86.329825,4.332189,93.968253,69.711763,1911.192968
BANDY CREEK,232506.0,40.330986,127.246479,37.063380,4.900696,85.939355,83.378913,495.936622
BIG CREEK GROUP CAMP,232437.0,1.207792,20.474026,1.000000,4.343074,154.060606,116.428571,29.064935


In [306]:
rez_agg[rez_agg['FacilityID'] == 232490]

Unnamed: 0,Date,FacilityID,Site,Park,Reservations,NumberOfPeople,SitesBooked,AvgStayLen,AvgBookingHorizon,AverageFee,DailyRevenue
8,2013-10-02,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,1,1,1,2.000000,1.000000,18.000000,9.000000
15,2013-10-03,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,1,1,1,2.000000,1.000000,18.000000,9.000000
92,2013-10-12,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,73,152,73,2.356164,0.000000,23.301371,696.000000
106,2013-10-13,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,128,294,112,2.351562,0.007812,22.851562,1203.449951
119,2013-10-14,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,151,388,129,2.490066,0.026490,24.437086,1424.164307
129,2013-10-15,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,160,500,131,2.600000,0.050000,25.706249,1523.914307
136,2013-10-16,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,173,580,149,2.658960,0.046243,26.115606,1642.664307
149,2013-10-17,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,181,568,151,2.607735,0.049724,25.309393,1693.514282
173,2013-10-18,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,200,545,164,2.625000,0.225000,26.745001,1966.064331
213,2013-10-19,232490.0,MATHER CAMPGROUND,Grand Canyon National Park,173,471,148,2.664740,0.416185,27.208092,1717.747559


## Combined Data EDA and Preprocessing
---

In [326]:
# set directory to collect aggregated csv files
os.chdir("./data/Aggregated-Data")

In [327]:
# get filenames of csv files in the aggregated data folder
extension = 'csv'

# create list of filenames
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
print(all_filenames)

['nps_agg_2016.csv', 'nps_agg_2017.csv', 'nps_agg_2015.csv', 'nps_agg_2014.csv', 'nps_agg_2018.csv']


In [328]:
#combine all files in the list
combined_rez = pd.concat([pd.read_csv(f) for f in all_filenames ])

# change directory back to repository base
os.chdir("../..")

#export to csv
combined_rez.to_csv( "./data/nps_combined_agg.csv", index=False, encoding='utf-8-sig')

In [329]:
# load aggregated dataframe
#combined_rez = pd.read_csv('./data/nps_combined_agg.csv')

In [330]:
# check size and shape of combined dataframe
print(mem_usage(combined_rez))
print(combined_rez.shape)

57.66 MB
(198042, 11)


In [331]:
# load in campsite data collected from RIDB
site_data = pd.read_csv('./data/nps_site_names.csv')

In [332]:
site_data.head()

Unnamed: 0,FacilityID,Site,NumberOfSites,Lat,Long
0,232432.0,Mathews Arm Campground (VA),166.0,38.766364,-78.298291
1,232433.0,Loft Mountain Campground (VA),207.0,38.248272,-78.673641
2,232434.0,Cataloochee Group Camp (NC),3.0,35.65113,-83.074725
3,232435.0,Deep Creek Picnic Pavilion (NC),2.0,35.460488,-83.438662
4,232436.0,Elkmont Group Camp (TN),4.0,35.659981,-83.586604


In [333]:
site_data[site_data['NumberOfSites'] == 0.0]

Unnamed: 0,FacilityID,Site,NumberOfSites,Lat,Long
60,232495.0,Flamingo (FL),0.0,,
99,233381.0,Circle X Ranch Group Campground,0.0,,
228,272243.0,SHEEP CAMP PRIMITIVE CAMPGROUND,0.0,,
231,272247.0,Crystal Creek Primitive Campground,0.0,,
233,272250.0,BRANDY CREEK PRIMITIVE CAMPGROUND,0.0,,


In [335]:
def site_data_format(df, sites):
    
    df['SitesAvailable'] = 0
    
    for index, row in sites.iterrows():
        if row['FacilityID'] in df['FacilityID'].values:
            df.loc[df['FacilityID'] == row['FacilityID'], 'Site'] = row['Site']
            df.loc[df['FacilityID'] == row['FacilityID'], 'SitesAvailable'] = row['NumberOfSites']
    
    df['SitesAvailable'] = df['SitesAvailable'].replace(0, 1)
    df['PercentBooked'] = df['SitesBooked'] / df['SitesAvailable']
    df['SitesAvailable'] = df['SitesAvailable'] - df['SitesBooked']
                  
    df.loc[df['PercentBooked'] > 1.0, 'PercentBooked'] = 1.0
           
    return df

In [336]:
test = site_data_format(combined_rez, site_data)

In [352]:
test['Site'].replace(regex=True, inplace=True, to_replace=r'\\\n', value=r'')

In [354]:
test[test['Site'].str.contains('Mather')]

Unnamed: 0,Date,FacilityID,Site,Park,Reservations,NumberOfPeople,SitesBooked,AvgStayLen,AvgBookingHorizon,AverageFee,DailyRevenue,SitesAvailable,PercentBooked
8,2015-09-30,232490.0,Mather Campground (AZ),Grand Canyon National Park,9,16.0,9,2.000000,0.000000,17.000000,76.500000,348.0,0.025210
19,2015-10-01,232490.0,Mather Campground (AZ),Grand Canyon National Park,41,79.0,40,2.073171,0.000000,16.317074,323.700012,317.0,0.112045
42,2015-10-02,232490.0,Mather Campground (AZ),Grand Canyon National Park,52,100.0,50,2.115385,0.153846,16.038462,395.200012,307.0,0.140056
74,2015-10-03,232490.0,Mather Campground (AZ),Grand Canyon National Park,45,79.0,39,2.266667,0.400000,17.000000,363.200012,318.0,0.109244
112,2015-10-04,232490.0,Mather Campground (AZ),Grand Canyon National Park,112,206.0,96,2.383929,0.839286,19.446428,986.783325,261.0,0.268908
153,2015-10-05,232490.0,Mather Campground (AZ),Grand Canyon National Park,161,313.0,139,2.403727,0.875776,20.683229,1422.266724,218.0,0.389356
193,2015-10-06,232490.0,Mather Campground (AZ),Grand Canyon National Park,205,444.0,157,2.507317,0.921951,22.985365,1900.566650,200.0,0.439776
238,2015-10-07,232490.0,Mather Campground (AZ),Grand Canyon National Park,203,443.0,157,2.546798,0.965517,23.699507,1893.858276,200.0,0.439776
290,2015-10-08,232490.0,Mather Campground (AZ),Grand Canyon National Park,190,401.0,156,2.478947,1.189474,22.610527,1721.275024,201.0,0.436975
355,2015-10-09,232490.0,Mather Campground (AZ),Grand Canyon National Park,148,328.0,127,2.506757,1.364865,20.756756,1244.400024,230.0,0.355742
