# update campground status in google sheets

In [2]:
from bs4 import BeautifulSoup
import update_campstatus as uc
import requests
import re
import pandas as pd

In [3]:
# stanislaus
r = requests.get('https://www.fs.usda.gov/activity/stanislaus/recreation/camping-cabins/?recid=14833&actid=29')
# tahoe
r = requests.get('https://www.fs.usda.gov/activity/tahoe/recreation/camping-cabins/?recid=55444&actid=29')
# eldorado
r = requests.get('https://www.fs.usda.gov/activity/eldorado/recreation/camping-cabins/?recid=71008&actid=29')
soup=BeautifulSoup(r.text, 'html.parser')

In [4]:
def get_campground_urls(soup):
    urls = []
    url_pref = 'https://www.fs.usda.gov'
    for i in soup.find_all(re.compile("h\d")):
        if 'Campground Camping Areas' in i.contents:
            for j in i.find_next_siblings('ul'):
                for k in j.findAll('a'):
                    url = k.get('href')
                    s = k.contents[0].string
                    if s is not None:
                        if s.endswith('Campground'):
                            if not url.endswith('.pdf') and url is not None:
                                url = url_pref + url
                                urls.append([s, url])
    return urls
urls = get_campground_urls(soup)

In [5]:
urls[0]

[u'Pardoes Point Campground',
 u'https://www.fs.usda.gov/recarea/eldorado/recreation/camping-cabins/recarea/?recid=78518&actid=29']

In [6]:
def get_campground_data(url):
    r = requests.get(url)
    soup=BeautifulSoup(r.text, 'html.parser')
    table_data = {}
    for i in soup.find_all(re.compile("h\d")):
        if 'At a Glance' in i.contents:
            for j in i.find_next_siblings('div'):
                for k in j.findChildren('tr'):
                    header = k.find('th').string.replace(':', '')
                    content = k.find('td').get_text().replace(u'\xa0', '').strip()
                    if table_data.get(header) is None:
                        table_data[header] = [content]
                    else:
                        table_data[header].append(content)
    status = uc.get_campground_status(url)
    table_data['Status'] = status
    table_data['URL'] = url
    return pd.DataFrame(table_data)
get_campground_data(urls[0][1])
r = get_campground_data('https://www.fs.usda.gov/recarea/eldorado/recreation/camping-cabins/recarea/?recid=71029&actid=29')

In [7]:
rows = []
for campground, camp_url in urls:
    data = get_campground_data(camp_url)
    data['Campground'] = campground
    rows.append(data)

In [8]:
df = pd.concat(rows).reset_index(drop=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 20 columns):
Area Amenities        23 non-null object
Best Season           22 non-null object
Busiest Season        22 non-null object
Campground            38 non-null object
Closest Towns         22 non-null object
Current Conditions    30 non-null object
Fees                  37 non-null object
Information Center    22 non-null object
Open Season           35 non-null object
Operated By           33 non-null object
Operational Hours     19 non-null object
Passes                16 non-null object
Rentals & Guides      16 non-null object
Reservations          24 non-null object
Restrictions          14 non-null object
Restroom              23 non-null object
Status                38 non-null object
URL                   38 non-null object
Usage                 26 non-null object
Water                 23 non-null object
dtypes: object(20)
memory usage: 6.0+ KB


In [None]:
df.to_csv()

In [323]:
def munge_reservations(cell):
    if pd.isnull(cell):
        return ''
    fcfs = False
    res = False
    seasonal = False
    if re.match('(?i).*first.{1,4}come.{1,4}first.{1,4}serve', cell) is not None:
        fcfs = True
    elif 'no reservations' in cell.lower():
        fcfs = True
    elif cell.lower() in ['no','none.']:
        fcfs = True
        
    if 'for reservations' in cell.lower():
        res = True
    elif 'recreation.gov' in cell.lower():
        res = True
    elif cell.lower()[:3] == 'yes':
        res = True
    elif cell.lower() in ['recreation.pge.com',]:
        res = True
    
    if 'winter' in cell.lower() or 'summer' in cell.lower():
        seasonal = True
    
    if seasonal:
        return 'seasonal'
    
    if fcfs and res:
        return 'both'
    elif fcfs:
        return 'fcfs'
    elif res:
        return 'reservations only'
    else:
        print cell
        return cell
    
df.loc[:, 'Reservations'] = df['Reservations'].apply(munge_reservations)

In [324]:
def munge_fees(cell):
    if pd.isnull(cell):
        return ''
    fees = re.findall('\$\d+\.{0,1}\d*', cell)
    if len(fees) == 0:
        if 'no fee' in cell.lower():
            fee = '$0'
        elif 'donation' in cell.lower():
            fee = '$0'
        elif cell.lower() in ['free', 'none']:
            fee = '$0'
        else:
            fee = cell
            print cell
    else:
        fee = fees[0]
    
    return fee
    
df.loc[:, 'Fees'] = df['Fees'].apply(munge_fees)

In [325]:
def munge_water(cell):
    if pd.isnull(cell):
        return ''
    water = False
    if cell.lower() == 'potable':
        water =  True
    elif 'is available' in cell.lower() and 'untreated' not in cell.lower():
        water = True
    elif 'piped water' in cell.lower():
        water = True
    elif cell.lower() in ['potable water', 'yes']:
        water = True

    return water
    

df.loc[:, 'Potable Water'] = df['Water'].apply(munge_water)

In [326]:
def munge_restrooms(cell):
    if pd.isnull(cell):
        return ''
    vault = False
    flush = False
    if 'vault' in cell.lower():
        vault = True
    elif cell.lower() in ['yes']:
        vault=True
    
    if 'flush' in cell.lower():
        flush = True
    
    if vault and flush:
        restroom = 'Both'
    elif vault:
        restroom = 'Vault'
    elif flush:
        restroom = 'Flush'
    elif cell.lower() == 'no':
        restroom = 'None'
    else:
        restroom = cell
        print cell
        
    return restroom
        
df.loc[:, 'Restroom'] = df['Restroom'].apply(munge_restrooms)

In [331]:
df.fillna('', inplace=True)
columns = ['Campground', 'Status', 'Fees', 'Open Season', 'Reservations', 'Restroom', 'Potable Water', 'Usage', 'Water', 'URL']
df[columns]

Unnamed: 0,campground,status,Fees,Open Season,Reservations,Restroom,Potable Water,Usage,Water,url
0,Pardoes Point Campground,Open,$22,Memorial Weekend - September,,,,,,https://www.fs.usda.gov/recarea/eldorado/recre...
1,South Shore Campground,Open,$22,Memorial Weekend - September,,,,Medium,,https://www.fs.usda.gov/recarea/eldorado/recre...
2,Sugar Pine Point Campground,Open,$20,Memorial Weekend - September,,,,,,https://www.fs.usda.gov/recarea/eldorado/recre...
3,Middle Fork Cosumnes Campground,Open,$16,Memorial Weekend - November 1,,,,Light-Medium,,https://www.fs.usda.gov/recarea/eldorado/recre...
4,Pipi Campground,Open,$22,April - October,,,,Medium-Heavy,,https://www.fs.usda.gov/recarea/eldorado/recre...
5,Dru Barner Campground,Open,$8,year round,fcfs,Vault,True,Medium,Yes,https://www.fs.usda.gov/recarea/eldorado/recre...
6,Airport Flat Campground,Open,$0,Memorial Weekend - Early October,fcfs,Vault,False,Medium,no,https://www.fs.usda.gov/recarea/eldorado/recre...
7,Big Meadows Campground,Closed,$10,May - October,fcfs,Vault,True,Medium,Yes,https://www.fs.usda.gov/recarea/eldorado/recre...
8,Hell Hole Campground,Closed,$10,May - October,fcfs,Vault,False,Light-Medium,No,https://www.fs.usda.gov/recarea/eldorado/recre...
9,Lovers Leap Campground,Open,$10,June - November,fcfs,,,Light-Medium,,https://www.fs.usda.gov/recarea/eldorado/recre...


In [332]:
df

Unnamed: 0,Area Amenities,Best Season,Busiest Season,Closest Towns,Current Conditions,Fees,Information Center,Open Season,Operated By,Operational Hours,...,Rentals & Guides,Reservations,Restrictions,Restroom,Usage,Water,campground,status,url,Potable Water
0,,,,,Water system not operational. Host will have b...,$22,,Memorial Weekend - September,Sierra Recreation Managers,,...,,,,,,,Pardoes Point Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,
1,,,,,Water system not operational. Host will have b...,$22,,Memorial Weekend - September,Sierra Recreation Managers,,...,,,,,Medium,,South Shore Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,
2,,,,,,$20,,Memorial Weekend - September,Sierra Recreation Managers,,...,,,,,,,Sugar Pine Point Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,
3,,,,,Campfire permits are required.,$16,,Memorial Weekend - November 1,,,...,,,,,Light-Medium,,Middle Fork Cosumnes Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,
4,,,,,Due to storm damage the boardwalk will remain ...,$22,,April - October,Sierra Recreation Managers,,...,,,,,Medium-Heavy,,Pipi Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,
5,"Tent camping,Camping trailer,Fee charged for s...",Summer,Summer,"Georgetown, Ca",,$8,"Georgetown Ranger Station, 530-333-4312\r\n\t7...",year round,US Forest Service,The campground is generally open year-round ho...,...,,fcfs,Please keep family pets on a leash at all time...,Vault,Medium,Yes,Dru Barner Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,True
6,"Accessible,Tent camping,Camping trailer,Toilets",Summer,Summer,"Pollock Pines, Ca",,$0,Information for the Crystal Basin Area is hand...,Memorial Weekend - Early October,,Check in time is 2:00 p.m. Check out time is 1...,...,,fcfs,There is no piped water at the campground. Wat...,Vault,Medium,no,Airport Flat Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,False
7,"Tent camping,Camping trailer,Fee charged for s...",Summer,Summer,"Georgetown, Ca",Big Meadows and Hell Hole Campground will NOT ...,$10,"Georgetown Ranger Station, 530-333-4312\r\n760...",May - October,USFS,The campground is generally open between May 1...,...,,fcfs,CAUTION: There are bears in the area. Please k...,Vault,Medium,Yes,Big Meadows Campground,Closed,https://www.fs.usda.gov/recarea/eldorado/recre...,True
8,,Summer,Summer,"Georgetown, Ca",Big Meadows and Hell Hole Campground will NOT ...,$10,"Georgetown Ranger Station, 530-333-4312\r\n760...",May - October,USFS,The campground is generally open between May 1...,...,,fcfs,Do not feed the animals.\nTrailers are not rec...,Vault,Light-Medium,No,Hell Hole Campground,Closed,https://www.fs.usda.gov/recarea/eldorado/recre...,False
9,"Tent camping,Fee charged for some activities,P...",,,,,$10,,June - November,,,...,,fcfs,,,Light-Medium,,Lovers Leap Campground,Open,https://www.fs.usda.gov/recarea/eldorado/recre...,
