# Get Buc-ees locations

#### Load Python tools, Jupyter config, variables

In [1]:
import os
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
api_key = os.environ.get("GOOGLE_GEOCODER")

In [4]:
place = "buc-ees"
place_formal = "Buc-ees"
color = "#D31145"

---

## Scrape

#### Headers for the request

In [5]:
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

In [6]:
response = requests.get("https://buc-ees.com/locations/", headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

#### Get store number and store in list

In [7]:
store_details = []
for details in tqdm(soup.find_all("h4")):
    store_details.append(details.text.strip())

  0%|          | 0/50 [00:00<?, ?it/s]

#### Get 'directions' URLs and store in a list

In [8]:
directions_list = []
for address in tqdm(soup.find_all("div", class_="bucees-location-directions")):
    directions_list.append(address.find("a")["href"])

  0%|          | 0/50 [00:00<?, ?it/s]

---

## Store/clean

#### Let's get these two lists into a dataframe

In [9]:
src = pd.DataFrame()
src["store_details"] = pd.Series(store_details)
src["url"] = pd.Series(directions_list)

#### Split out the store numbers

In [10]:
src[["store_number", "city_state"]] = src["store_details"].str.split(" – ", expand=True)

#### Split the Google Maps urls to parse the address

In [11]:
src["address_full"] = src["url"].str.split("/", expand=True)[5]

In [12]:
src[["street", "city", "state_name", "zip"]] = src["address_full"].str.split(
    "+", expand=True
)

#### Just the columns we need

In [13]:
df = src[["store_number", "street", "city", "state_name", "zip", "url"]].copy()

---

## Geocode

#### Create a list of URLs with addresses for geocoding

In [14]:
url_list = df["url"].to_list()

#### Iterate through the URLs and get coordinates using the [Google Geocoding API](https://developers.google.com/maps/documentation/geocoding/overview). 
> You need a key in your local environment. *See `api_key` above.*

In [15]:
latitudes = []
longitudes = []

if not api_key:
    raise ValueError("Google Geocoder API key not found in environment variables.")

for url in tqdm(url_list):
    address = url.split("/maps/search/")[1]
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    params = {"address": address.replace(" ", "+"), "key": api_key}

    response = requests.get(base_url, params=params).json()

    if response["status"] == "OK":
        latitude = response["results"][0]["geometry"]["location"]["lat"]
        longitude = response["results"][0]["geometry"]["location"]["lng"]
        latitudes.append(latitude)
        longitudes.append(longitude)
    else:
        print(f"Failed to process URL: {url}")
        print(f"Status: {response['status']}")
        if "error_message" in response:
            print(f"Error message: {response['error_message']}")
        else:
            print("No error message provided.")

        latitudes.append(None)
        longitudes.append(None)

  0%|          | 0/50 [00:00<?, ?it/s]

#### Update the DataFrame with the latitude and longitude lists

In [16]:
df["latitude"] = latitudes
df["longitude"] = longitudes

#### We no longer need the URL column

In [17]:
df = df.drop("url", axis=1)

#### The result

In [18]:
df.head()

Unnamed: 0,store_number,street,city,state_name,zip,latitude,longitude
0,#57,2328 Lindsay Lane South,Athens,Alabama,35613,34.728536,-86.932527
1,#58,2500 Buc-ee’s Blvd,Auburn,Alabama,36832,32.552099,-85.526638
2,#43,6900 Buc-ee’s Blvd.,Leeds,Alabama,35094,33.544442,-86.58659
3,#42,20403 County Rd. 68,Robertsdale,Alabama,36567,30.633299,-87.674553
4,#60,5201 Nugget Road,Berthoud,CO,80513,40.333635,-104.983357


#### How many locations?

In [19]:
len(df)

50

#### Make sure our brand name gets in the dataframe

In [20]:
df["brand"] = place_formal

---

## Geography

#### Make it a geodataframe

In [21]:
df_geo = df.copy()

In [22]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [23]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [24]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=20, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [25]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state_name"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state_name:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [26]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [27]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [28]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)

