# Scrape data on parks

In [88]:
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

## Load data

In [89]:
cp_parks = pd.read_csv('./Data/College_Park_Parks.csv')

In [90]:
cp_parks.head(5)

Unnamed: 0,PARKID,PARKNAME,OWNER,ORIG_FID
0,N79,Indian Creek Stream Valley Park,M-NCPPC,3
1,N36,College Park Woods Park,M-NCPPC,6
2,N10,Berwyn Park,M-NCPPC,11
3,,James Adams Park,MUNI,24
4,N62,Acredale Park,M-NCPPC,25


### Build URL

In [91]:
def make_url(park):
    url_base = 'https://www.pgparks.com/parks_trails/'
    park_url = park.replace(' ','-').lower()
    park_url = url_base + park_url
    return park_url

In [92]:
for index, row in cp_parks.iterrows():
    cp_parks.loc[index, 'park_url'] = make_url(cp_parks.loc[index, 'PARKNAME'])

In [93]:
cp_parks.head(5)

Unnamed: 0,PARKID,PARKNAME,OWNER,ORIG_FID,park_url
0,N79,Indian Creek Stream Valley Park,M-NCPPC,3,https://www.pgparks.com/parks_trails/indian-cr...
1,N36,College Park Woods Park,M-NCPPC,6,https://www.pgparks.com/parks_trails/college-p...
2,N10,Berwyn Park,M-NCPPC,11,https://www.pgparks.com/parks_trails/berwyn-park
3,,James Adams Park,MUNI,24,https://www.pgparks.com/parks_trails/james-ada...
4,N62,Acredale Park,M-NCPPC,25,https://www.pgparks.com/parks_trails/acredale-...


In [94]:
amenities = ['Baseball/Softball Fields', 'Basketball Courts', 'Fitness Equipment', 'Playground Equipment', 'Street Hockey','Tennis Courts', 'Trail Access', 'Picnic Tables', 'BBQ Grills', 'Fields(multipurpose)', 'Parking Lot', 'Soccer/Futsal', 'Fishing']

In [95]:
for index, row in cp_parks.iterrows():
    url = cp_parks.loc[index, 'park_url']
    r = requests.get(url, timeout=100)
    if r.status_code == 404:
            for amenity in amenities:
                cp_parks.loc[index, amenity] = 'No'
    else:
        page = urlopen(url)
        html_bytes = page.read()
        html = html_bytes.decode("utf-8")
        for amenity in amenities:
            if amenity in html:
                cp_parks.loc[index, amenity] = 'Yes'
            else:
                cp_parks.loc[index, amenity] = 'No'
            

In [96]:
for index, row in cp_parks.iterrows():
    url = cp_parks.loc[index, 'park_url']
    print(url)

https://www.pgparks.com/parks_trails/indian-creek-stream-valley-park
https://www.pgparks.com/parks_trails/college-park-woods-park
https://www.pgparks.com/parks_trails/berwyn-park
https://www.pgparks.com/parks_trails/james-adams-park
https://www.pgparks.com/parks_trails/acredale-park
https://www.pgparks.com/parks_trails/college-park-community-center
https://www.pgparks.com/parks_trails/hollywood-park
https://www.pgparks.com/parks_trails/paint-branch-parkway-park
https://www.pgparks.com/parks_trails/cherry-hill-park
https://www.pgparks.com/parks_trails/calvert-park
https://www.pgparks.com/parks_trails/paint-branch-golf-complex
https://www.pgparks.com/parks_trails/lake-artemesia-park
https://www.pgparks.com/parks_trails/lakeland-park
https://www.pgparks.com/parks_trails/northgate-urban-park
https://www.pgparks.com/parks_trails/sunnyside-park
https://www.pgparks.com/parks_trails/hollywood-gateway-park


In [97]:
cp_parks.head(5)

Unnamed: 0,PARKID,PARKNAME,OWNER,ORIG_FID,park_url,Baseball/Softball Fields,Basketball Courts,Fitness Equipment,Playground Equipment,Street Hockey,Tennis Courts,Trail Access,Picnic Tables,BBQ Grills,Fields(multipurpose),Parking Lot,Soccer/Futsal,Fishing
0,N79,Indian Creek Stream Valley Park,M-NCPPC,3,https://www.pgparks.com/parks_trails/indian-cr...,No,No,No,No,Yes,No,Yes,No,No,No,No,No,No
1,N36,College Park Woods Park,M-NCPPC,6,https://www.pgparks.com/parks_trails/college-p...,Yes,Yes,Yes,Yes,No,Yes,Yes,No,No,No,No,No,No
2,N10,Berwyn Park,M-NCPPC,11,https://www.pgparks.com/parks_trails/berwyn-park,No,Yes,No,Yes,No,Yes,No,Yes,No,No,No,No,No
3,,James Adams Park,MUNI,24,https://www.pgparks.com/parks_trails/james-ada...,No,No,No,No,No,No,No,No,No,No,No,No,No
4,N62,Acredale Park,M-NCPPC,25,https://www.pgparks.com/parks_trails/acredale-...,Yes,No,Yes,Yes,No,No,No,Yes,Yes,Yes,Yes,Yes,No


# Build pop-up field

In [100]:
fields = range(6,19) # Fields containing the names to be used
cp_parks['Amenities'] = ''
for index, row in cp_parks.iterrows(): # For each index and values
    amenities = []
    amenity_text = ''
    for field in fields: # For each field input
        if cp_parks.iloc[index,field] == "Yes": # If the amenity is present for the park
            amenities.append(cp_parks.columns[field]) # Add its name to the amenities list
    for amenity in amenities: # For each copied amenity
         if amenity_text == '': # If the string object is empty
             amenity_text = amenity # Assign the amenity value
         else: # If it isn't empty
             amenity_text = amenity_text + '\n' + amenity # Concatenate the existing amenity and add a line break
    cp_parks.iloc[index, 18] = amenity_text # Add the amenities for the corresponding row
    

cp_parks.head(5)

Unnamed: 0,PARKID,PARKNAME,OWNER,ORIG_FID,park_url,Baseball/Softball Fields,Basketball Courts,Fitness Equipment,Playground Equipment,Street Hockey,Tennis Courts,Trail Access,Picnic Tables,BBQ Grills,Fields(multipurpose),Parking Lot,Soccer/Futsal,Fishing,Amenities
0,N79,Indian Creek Stream Valley Park,M-NCPPC,3,https://www.pgparks.com/parks_trails/indian-cr...,No,No,No,No,Yes,No,Yes,No,No,No,No,No,No,Street Hockey\nTrail Access
1,N36,College Park Woods Park,M-NCPPC,6,https://www.pgparks.com/parks_trails/college-p...,Yes,Yes,Yes,Yes,No,Yes,Yes,No,No,No,No,No,No,Basketball Courts\nFitness Equipment\nPlaygrou...
2,N10,Berwyn Park,M-NCPPC,11,https://www.pgparks.com/parks_trails/berwyn-park,No,Yes,No,Yes,No,Yes,No,Yes,No,No,No,No,No,Basketball Courts\nPlayground Equipment\nTenni...
3,,James Adams Park,MUNI,24,https://www.pgparks.com/parks_trails/james-ada...,No,No,No,No,No,No,No,No,No,No,No,No,No,
4,N62,Acredale Park,M-NCPPC,25,https://www.pgparks.com/parks_trails/acredale-...,Yes,No,Yes,Yes,No,No,No,Yes,Yes,Yes,Yes,Yes,No,Fitness Equipment\nPlayground Equipment\nPicni...


### Save file

In [101]:
cp_parks.to_csv('CP_Parks.csv')