# Collect addresses

In this notebook we're going to collect every Buc-ee's address from [their company directory](https://buc-ees.com/locations/).

In [1]:
# We import requests and BeautifulSoup to scrape addresses from Buc-ee's website
import requests
from bs4 import BeautifulSoup

# For data manipulation and analysis we import pandas
import pandas as pd

# To display data in a table we import itables
from itables import show

We're going to scrape the Buc-ee's directory to get all the locations. Their site is unreliably slow, so we're just going to scrape the html file I saved locally.

In [2]:
buc_ees_list = []

# I have a file located in data/locations.html. I can read it with open() and pass it to BeautifulSoup.
with open('../data/locations.html') as f:
    soup = BeautifulSoup(f, 'html.parser')

# Find all the divs with the class 'bucees-location'
locations = soup.find_all('div', class_='bucees-location')

# Print each of the locations
for location in locations:
    
    # Find the address. It is in a div with the class 'bucees-location-address'. Only search the direct children.
    address = location.find('div', class_='bucees-location-address')

    # Replace <br /> with a space
    for br in address.find_all('br'):
        br.replace_with(' ')

    # Remove text that's in <small> tags
    for small in address.find_all('small'):
        small.decompose()

    # Remove newline characters from the address
    address = address.text.replace('\n', ' ')

    address = address.strip()

    # Remove consecutive spaces
    while '  ' in address:
        address = address.replace('  ', ' ')

    # Get the href located inside the div tags with a class of bucees-location-directions
    directions = location.find('div', class_='bucees-location-directions').find('a')['href']

    buc_ees_list.append(
        {
            'name': location.find('h4').text,
            'address': address,
            'directions': directions,
        }
    )

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(buc_ees_list)

df.to_csv('../output/buc-ees_directory.csv', index=False)

# Display the DataFrame
df

Unnamed: 0,name,address,directions
0,"#57 – Athens, AL","2328 Lindsay Lane South Athens, AL 35613",https://www.google.com/maps/search/2328 Lindsa...
1,"#43 – Leeds, AL","6900 Buc-ee’s Blvd. Leeds, Alabama 35094",https://www.google.com/maps/search/6900 Buc-ee...
2,"#42 – Loxley, AL","20403 County Rd. 68 Robertsdale, Alabama 36567",https://www.google.com/maps/search/20403 Count...
3,"#47 – Daytona Beach, FL","2330 Gateway North Drive Daytona Beach, FL 32117",https://www.google.com/maps/search/2330 Gatewa...
4,"#46 – Saint Augustine, FL","200 World Commerce Pkwy Saint Augustine, Flori...",https://www.google.com/maps/search/200 World C...
5,"#52 – Calhoun, GA","601 Union Grove Rd. SE Adairsville, GA 30103",https://www.google.com/maps/search/601 Union G...
6,"#51 – Warner Robins, GA","7001 Russell Parkway Fort Valley, Georgia 31030",https://www.google.com/maps/search/7001 Russel...
7,"#55 – Richmond, KY","1013 Buc-ee's Boulevard Richmond, Kentucky 40475",https://www.google.com/maps/search/1013 Buc-ee...
8,"#53 – Florence, SC","3390 North Williston Road Florence, South Caro...",https://www.google.com/maps/search/3390 North ...
9,"#50 Crossville, TN","2045 Genesis Road Crossville, Tennessee 38555",https://www.google.com/maps/search/2045 Genesi...
