# Project 2
## Team B, CA Beaches


In [2]:
# set environment
from bs4 import BeautifulSoup
from datetime import date
import pandas as pd
import ast
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [4]:
from sqlalchemy import create_engine, insert
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
#from config import username
#from config import password

In [None]:
# connect to SQL database
engine = create_engine(f'postgresql://postgres:{password}@localhost:5432/ETL_IrvineCoApts_db')
connection = engine.connect()

In [None]:
# Reflect an existing database into a new model
Base = automap_base()
Base.prepare(engine, reflect=True)

In [None]:
# create references to our tables
Beach = Base.classes.Beach

In [None]:
# initiate a database session
session = Session(connection)

In [5]:
# initialize connection with Chrome Driver
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=True)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389






[WDM] - Driver [C:\Users\kate_\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache


In [6]:
# define our base URL
base_url = "https://www.californiabeaches.com/beaches/"

In [7]:
# scrape the regions off the main beach page
browser.visit(base_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser") 

cali_soup = soup.find(id="regions")


In [8]:
# initialize empty county URL list
county_urls = []    
county_url = ""

# scrape lists of counties in each region
region_soup = cali_soup.find_all("ul")


for region in region_soup:
    county_soup = region.find_all("a", href=True)


    for county in county_soup:
        if county:
            # we found one, so pull out the URL
            new_url = county["href"]
            
            # only save one copy of each url
            if new_url != county_url:
                county_url = new_url
                x = county_url.split("/")
                
                # split out the region and county
                # and clean out hyphens
                region = x[-3]
                cnty = x[-2]
                cnty = cnty.replace("-", " ")
                cnty = cnty.replace(" county", "")
                county_urls.append([region.title(), cnty.title(), county_url])
                

print(len(county_urls))


16


In [9]:
# initialize empty area list
area_urls = []    
area_url = ""

# loop through county URLs
for county in county_urls:

    # scrape the county webpage
    browser.visit(county[2])
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    
    # scrape the list of beaches
    county_soup = soup.find(id="beach-list")
    
    # scrape the links
    area_soup = county_soup.find_all("a", href=True)
    
    for area in area_soup:
        if area:
            # we have one, so pull out the URL
            new_url = area["href"]

            if new_url != area_url:
                area_url = new_url
                x = area_url.split("/")
                
                # split out the area name
                # and clean out hyphens
                curr_area = x[-2]
                curr_area = curr_area.replace("-", " ")
                area_urls.append([county[0], county[1], curr_area.title(), area_url])
                
#print(area_urls)
print(len(area_urls))

112


In [10]:
# initialize empty beach list
beach_urls = []    
beach_url = ""

for area in area_urls:
    
    # scrape the area page
    browser.visit(area[3])
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    
    # scrape the list of beaches
    area_soup = soup.find(id="beach-list")
    
    # scrape the links
    beach_soup = area_soup.find_all("a", href=True)
    
    for beach in beach_soup:
        if beach:

            # we found one, so pull out the URL
            new_url = beach["href"]

            if new_url != beach_url:
                beach_url = new_url
                x = beach_url.split("/")
                # split out the beach name and
                # clean out the hyphens
                curr_beach = x[-2]
                curr_beach = curr_beach.replace("-", " ")
                beach_urls.append([area[0], area[1], area[2], curr_beach.title(), beach_url])
                
print(beach_urls[0])
print(len(beach_urls))


['Southern', 'San Diego', 'Carlsbad', 'Carlsbad City Beach', 'https://www.californiabeaches.com/beach/carlsbad-city-beach/']
1014


In [11]:
# initialize empty list of data titles
title_list = []


for beach in beach_urls:
    
    # append an empty dictionary to each beach list
    # to hold beach info
    beach.append({})
    
    try:
        
        # scrape each beach page
        browser.visit(beach[4])
        html = browser.html
        soup = BeautifulSoup(html, "html.parser")

        # scrape the list of data
        data_soup = soup.find("dl")

        # scrape the titles and data values
        title_soup = data_soup.find_all("dt")
        value_soup = data_soup.find_all("dd")

        i = 0
        
        # save column titles and data
        for title in title_soup:
            
            if title.text == "Address":
                addr_str = str(value_soup[i])
                addr_br = addr_str.split("<br/>")
        
                addr1_br = addr_br[0].split(">")
                beach[5]["address"] = addr1_br[-1]
        
                addr2_br = addr_br[1].split("<")
                
                city_state = addr2_br[0].split()
                
                beach[5]["zip"] = city_state[-1]
                beach[5]["state"] = city_state[-2] 
                beach[5]["city"] = city_state[0].replace(",", "")              
                    
                gmap = ast.literal_eval(value_soup[i].find("span")["data-gmapping"])                
               
                beach[5]["latitude"] = gmap["latlng"]["lat"]
                beach[5]["longitude"] = gmap["latlng"]["lng"]
                
#                print(lat, lng)

                if "address" not in title_list:
                    title_list.append("address")
                    title_list.append("city")
                    title_list.append("state")
                    title_list.append("zip")
                    title_list.append("latitude")
                    title_list.append("longitude")
        
            elif title.text == "Owner":
                owner = value_soup[i].text
                beach[5]["owner"] = owner
                if "owner" not in title_list:
                    title_list.append("owner")
            
                if value_soup[i].a:
                    owner_url = value_soup[i].a["href"]
                    beach[5]["owner_url"] = owner_url
                    
                    if "owner_url" not in title_list:
                        title_list.append("owner_url")
        
            else:
                mod_title = title.text.replace(" ", "_").lower()
                beach[5][mod_title] = value_soup[i].text
                if mod_title not in title_list:
                    title_list.append(mod_title)
                                     
            i+=1


    except Exception as e:
        print(f"Error processing: {beach[4]}, {e}")
        
#    print(beach)

#print(title_list)    
#print(beach_urls[0])


Error processing: https://www.californiabeaches.com/beach/sands-beach/, 'NoneType' object has no attribute 'find_all'
Error processing: https://www.californiabeaches.com/beach/willow-creek-picnic-area/, 'NoneType' object has no attribute 'find_all'
Error processing: https://www.californiabeaches.com/beach/strawberry-beach-at-wilder-ranch-state-park/, 'NoneType' object has no attribute 'find_all'
Error processing: https://www.californiabeaches.com/beach/schoolhouse-beach/, 'NoneType' object has no attribute 'find_all'
['Southern', 'San Diego', 'Carlsbad', 'Carlsbad City Beach', 'https://www.californiabeaches.com/beach/carlsbad-city-beach/', {'address': 'Ocean Street and Grand Avenue', 'zip': '92008', 'state': 'CA', 'city': 'Carlsbad', 'latitude': 33.158185, 'longitude': -117.354085, 'park_name': 'Carlsbad State Beach', 'owner': 'State Park', 'owner_url': 'http://www.carlsbadca.gov/about/visitorinfo/Pages/about-carlsbad-beaches1.aspx', 'activities': 'Sunbathing, Beach Walking', 'amenitie

In [None]:
# connect to SQL database
engine = create_engine(f'postgresql://{username}:{password}@localhost:5432/ddh5sm9o0kv98b')
connection = engine.connect()

In [None]:
# Reflect an existing database into a new model
Base = automap_base()
Base.prepare(engine, reflect=True)

In [None]:
# create references to our tables
Beaches = Base.classes.beaches

In [None]:
# initiate a database session
session = Session(connection)

In [13]:
for title in title_list:
    if title not in beach[5]:   
         beach[5][title] = ""

# loop through all the beaches we scraped
for beach in beach_urls:
    
    # add data to appropriate lists
    new_beach = Beaches(region = beach[0], county = beach[1], area = beach[2], beach_name = beach[3], \
                        beach_url = beach[4], address = beach[5]["address"], city = beach[5]["city"], \
                        state = beach[5]["state"], zip = beach[5]["zip"], latitude = beach[5]["latitude"], \
                        longitude = beach[5]["longitude"], park_name = beach[5]["park_name"], \
                        owner = beach[5]["owner"], owner_url = beach[5]["owner_url"], \
                        activities = beach[5]["activities"], amenities = beach[5]["amenities"], \
                        pet_policy = beach[5]["pet_policy"], pets_allowed = "Y", fees = beach[5]["fees"], \
                        free_parking = "N", phone = beach[5]["phone"], other_names = beach[5]["other_names"])
    

    session.add(new_beach)
        
session.commit()

In [14]:
# dump data into dataframe
beach_df = pd.DataFrame(beach_data)

In [15]:
# display dataframe
beach_df

Unnamed: 0,region,county,area,beach_name,beach_url,address,city,state,zip,latitude,longitude,park_name,owner,owner_url,activities,amenities,pet_policy,fees,phone,other_names
0,Southern,San Diego,Carlsbad,Carlsbad City Beach,https://www.californiabeaches.com/beach/carlsb...,Ocean Street and Grand Avenue,Carlsbad,CA,92008,33.1582,-117.354,Carlsbad State Beach,State Park,http://www.carlsbadca.gov/about/visitorinfo/Pa...,"Sunbathing, Beach Walking",No Facilities,No dogs allowed on the beach or sea wall,Free street parking,,
1,Southern,San Diego,Carlsbad,Carlsbad Lagoon Agua Hedionda,https://www.californiabeaches.com/beach/carlsb...,4750 Bayshore Dr,Carlsbad,CA,92008,33.1421,-117.321,Public Lagoon,City Access,https://www.carlsbadca.gov/residents/fun/lagoo...,"Kayaking, Canoeing, Stand-Up Paddleboarding, B...","Lagoon, Trails, Rentals",Leashed dogs ok on land but not in the water,Free street parking,760-434-3089,
2,Southern,San Diego,Carlsbad,North Ponto Beach,https://www.californiabeaches.com/beach/north-...,Carlsbad Blvd and Island Way,Carlsbad,CA,92011,33.1137,-117.324,South Carlsbad State Beach,State Park,http://www.parks.ca.gov/?page_id=660,"Surfing, Swimming, Fishing, Scuba Diving, Sunb...","Restrooms, Lifeguard",No dogs allowed on the beach,Fee for parking,,
3,Southern,San Diego,Carlsbad,Robert Frazee State Beach,https://www.californiabeaches.com/beach/robert...,3150 Ocean Street,Carlsbad,CA,92008,33.1555,-117.352,Carlsbad State Beach,State Park,http://www.parks.ca.gov/?page_id=653,"Surfing, Swimming, Sunbathing, Fishing, Runnin...","Restrooms, Showers, Lifeguard, Grass Park, Ben...",No dogs allowed on the beach,,,
4,Southern,San Diego,Carlsbad,South Carlsbad State Beach,https://www.californiabeaches.com/beach/south-...,Carlsbad Blvd and Poinsettia Ln,Carlsbad,CA,92011,33.1004,-117.319,"South Carlsbad State Beach, Batiquitos Lagoon ...",State Park,http://www.parks.ca.gov/?page_id=660,"Camping, Surfing, Swimming, Scuba Diving, Hiki...","Campground, Fire Pits, Restrooms, Showers, Lif...",No dogs allowed on the beach,"Fee for camping, free day-use parking south of...",(760) 438-3143,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,Northern,Del Norte,Klamath,Lagoon Creek Beach,https://www.californiabeaches.com/beach/lagoon...,Redwood Hwy,Klamath,CA,95548,41.596,-124.102,"Redwood National Park, Lagoon Creek Picnic Area",National Park,http://www.nps.gov/redw/,"Hiking, Beachcombing, Birdwatching, Surfing, F...","Picnic Tables, Restrooms, Driftwood, Trails, W...","Dogs allowed on leash on beach, but not allowe...",Free parking,,False Klamath Cove
1010,Northern,Del Norte,Klamath,Wilson Creek Beach,https://www.californiabeaches.com/beach/wilson...,Redwood Hwy and Wilson Creek Rd,Klamath,CA,95548,41.6035,-124.101,Del Norte Coast Redwoods State Park,State Park,http://www.parks.ca.gov/?page_id=414,"Tidepooling, Birdwatching, Surfing, Beach Walking","Picnic Tables, BBQs, Tide Pools, Driftwood, Creek",Dogs allowed on leash,Free parking,,Wilson Beach
1011,Northern,Del Norte,Smith River,Clifford Kamph Memorial Park,https://www.californiabeaches.com/beach/cliffo...,15100 Hwy 101,Smith,CA,95567,41.9716,-124.206,Clifford Kamph Memorial Park,County,http://www.co.del-norte.ca.us/departments/parks,"Beachcombing, Hiking, Surfing, Fishing, Campin...","Campground, Restrooms, Picnic Tables, BBQs, Tr...",Dogs allowed on leash,"Fee for camping, free day-use parking",707-464-7230,
1012,Northern,Del Norte,Smith River,Pelican State Beach,https://www.californiabeaches.com/beach/pelica...,17200 Hwy 101,Smith,CA,95567,41.9925,-124.209,Pelican State Beach,State Park,http://www.parks.ca.gov/?page_id=412,"Beachcombing, Beach Walking, Fishing, Surfing,...","Driftwood, No Facilities",No dogs allowed on the beach,Free parking,,


In [16]:
beach_df["pets_allowed"] = ""

In [17]:
beach_df["free_parking"] = ""

In [18]:
beach_df

Unnamed: 0,region,county,area,beach_name,beach_url,address,city,state,zip,latitude,...,owner,owner_url,activities,amenities,pet_policy,fees,phone,other_names,pets_allowed,free_parking
0,Southern,San Diego,Carlsbad,Carlsbad City Beach,https://www.californiabeaches.com/beach/carlsb...,Ocean Street and Grand Avenue,Carlsbad,CA,92008,33.1582,...,State Park,http://www.carlsbadca.gov/about/visitorinfo/Pa...,"Sunbathing, Beach Walking",No Facilities,No dogs allowed on the beach or sea wall,Free street parking,,,,
1,Southern,San Diego,Carlsbad,Carlsbad Lagoon Agua Hedionda,https://www.californiabeaches.com/beach/carlsb...,4750 Bayshore Dr,Carlsbad,CA,92008,33.1421,...,City Access,https://www.carlsbadca.gov/residents/fun/lagoo...,"Kayaking, Canoeing, Stand-Up Paddleboarding, B...","Lagoon, Trails, Rentals",Leashed dogs ok on land but not in the water,Free street parking,760-434-3089,,,
2,Southern,San Diego,Carlsbad,North Ponto Beach,https://www.californiabeaches.com/beach/north-...,Carlsbad Blvd and Island Way,Carlsbad,CA,92011,33.1137,...,State Park,http://www.parks.ca.gov/?page_id=660,"Surfing, Swimming, Fishing, Scuba Diving, Sunb...","Restrooms, Lifeguard",No dogs allowed on the beach,Fee for parking,,,,
3,Southern,San Diego,Carlsbad,Robert Frazee State Beach,https://www.californiabeaches.com/beach/robert...,3150 Ocean Street,Carlsbad,CA,92008,33.1555,...,State Park,http://www.parks.ca.gov/?page_id=653,"Surfing, Swimming, Sunbathing, Fishing, Runnin...","Restrooms, Showers, Lifeguard, Grass Park, Ben...",No dogs allowed on the beach,,,,,
4,Southern,San Diego,Carlsbad,South Carlsbad State Beach,https://www.californiabeaches.com/beach/south-...,Carlsbad Blvd and Poinsettia Ln,Carlsbad,CA,92011,33.1004,...,State Park,http://www.parks.ca.gov/?page_id=660,"Camping, Surfing, Swimming, Scuba Diving, Hiki...","Campground, Fire Pits, Restrooms, Showers, Lif...",No dogs allowed on the beach,"Fee for camping, free day-use parking south of...",(760) 438-3143,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,Northern,Del Norte,Klamath,Lagoon Creek Beach,https://www.californiabeaches.com/beach/lagoon...,Redwood Hwy,Klamath,CA,95548,41.596,...,National Park,http://www.nps.gov/redw/,"Hiking, Beachcombing, Birdwatching, Surfing, F...","Picnic Tables, Restrooms, Driftwood, Trails, W...","Dogs allowed on leash on beach, but not allowe...",Free parking,,False Klamath Cove,,
1010,Northern,Del Norte,Klamath,Wilson Creek Beach,https://www.californiabeaches.com/beach/wilson...,Redwood Hwy and Wilson Creek Rd,Klamath,CA,95548,41.6035,...,State Park,http://www.parks.ca.gov/?page_id=414,"Tidepooling, Birdwatching, Surfing, Beach Walking","Picnic Tables, BBQs, Tide Pools, Driftwood, Creek",Dogs allowed on leash,Free parking,,Wilson Beach,,
1011,Northern,Del Norte,Smith River,Clifford Kamph Memorial Park,https://www.californiabeaches.com/beach/cliffo...,15100 Hwy 101,Smith,CA,95567,41.9716,...,County,http://www.co.del-norte.ca.us/departments/parks,"Beachcombing, Hiking, Surfing, Fishing, Campin...","Campground, Restrooms, Picnic Tables, BBQs, Tr...",Dogs allowed on leash,"Fee for camping, free day-use parking",707-464-7230,,,
1012,Northern,Del Norte,Smith River,Pelican State Beach,https://www.californiabeaches.com/beach/pelica...,17200 Hwy 101,Smith,CA,95567,41.9925,...,State Park,http://www.parks.ca.gov/?page_id=412,"Beachcombing, Beach Walking, Fishing, Surfing,...","Driftwood, No Facilities",No dogs allowed on the beach,Free parking,,,,


In [19]:
# write dataframe to a CSV file
beach_df.to_csv("data/beach_info.csv")