In [1]:
import chardet
import numpy as np
import pandas as pd
import codecs

In [2]:
filename = '../Data/RIDB_Export/Campsites_API_v1.csv'

In [3]:
# attempt to load the data using various codecs
types_of_encoding = ["utf8", "cp1252"]
for encoding_type in types_of_encoding:
    with codecs.open(filename, encoding = encoding_type, errors ='replace') as csvfile:
        sites = pd.read_csv(csvfile)

In [4]:
sites.shape

(97441, 11)

In [5]:
sites.columns

Index(['CampsiteID', 'FacilityID', 'CampsiteName', 'CampsiteType', 'TypeOfUse',
       'Loop', 'CampsiteAccessible', 'CampsiteLongitude', 'CampsiteLatitude',
       'CreatedDate', 'LastUpdatedDate'],
      dtype='object')

In [6]:
site_group = sites.groupby('FacilityID').agg({'CampsiteID':'nunique', 'CampsiteLongitude': 'min', 'CampsiteLatitude': 'max'})

In [7]:
site_group.rename(columns={'CampsiteID':'NumberOfSites', 'CampsiteLatitude':'Lat', 'CampsiteLongitude':'Long'}, inplace=True)

In [8]:
site_names = pd.read_csv('../data/nps_site_names.csv')

In [9]:
site_names.shape

(241, 5)

In [10]:
counts = []
lat = []
long = []
for site in site_names.iterrows():
    if site[1]['FacilityID'] in site_group.index:
        counts.append(site_group.loc[site[1]['FacilityID']]['NumberOfSites'])
        lat.append(site_group.loc[site[1]['FacilityID']]['Lat'])
        long.append(site_group.loc[site[1]['FacilityID']]['Long'])
    else:
        counts.append(0)
        lat.append(np.nan)
        long.append(np.nan)
site_names['NumberOfSites'] = counts
site_names['Lat'] = lat
site_names['Long'] = long

In [11]:
site_names['Site'].replace(regex=True, inplace=True, to_replace=r'\\\n', value=r'')

In [12]:
site_names.head()

Unnamed: 0,FacilityID,Site,NumberOfSites,Lat,Long
0,232432.0,Mathews Arm Campground (VA),166.0,38.766364,-78.298291
1,232433.0,Loft Mountain Campground (VA),207.0,38.248272,-78.673641
2,232434.0,Cataloochee Group Camp (NC),3.0,35.65113,-83.074725
3,232435.0,Deep Creek Picnic Pavilion (NC),2.0,35.460488,-83.438662
4,232436.0,Elkmont Group Camp (TN),4.0,35.659981,-83.586604


In [13]:
site_group.to_csv("../data/nps_site_data.csv")
site_names.to_csv("../data/nps_site_names.csv", index = False)