# Get Wawa locations

#### Load Python tools and Jupyter config

In [1]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
place = "wawa"
place_formal = "Wawa"
color = "#ce0e2d"
today = pd.Timestamp.today().strftime("%Y-%m-%d")

---

## Scrape

#### Import the country's largest ZIP Codes and ensure they have five digits

In [4]:
zips_selected = (
    pd.read_json("../../_reference/data/zip_code_demographics_esri.json")
    .query("population > 5000")
    .sort_values("population", ascending=False)
    .reset_index(drop=True)
)
zips_selected["zipcode"] = zips_selected["zipcode"].astype(str).str.zfill(5)

#### Headers for request

In [5]:
headers = {
    "authority": "www.wawa.com",
    "accept": "application/json",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
}

#### Function to send GraphQL query with given latitude and longitude to [store locator](https://www.wawa.com/locations/store-locator)

In [6]:
def send_query(latitude, longitude):
    json_data = {
        "query": """
        query FindNearLocations($latitude: Latitude!, $longitude: Longitude!) {
            findNearLocations(latitude: $latitude, longitude: $longitude) {
                results {
                    name
                    storeNumber
                    coordinates {
                        latitude
                        longitude
                    }
                    address {
                        address
                        city
                        state
                        zip
                    }
                }
            }
        }
        """,
        "variables": {
            "latitude": latitude,
            "longitude": longitude,
        },
    }

    response = requests.post(
        "https://www.wawa.com/api/bff", headers=headers, json=json_data
    )

    return response


results_list = []

# Iterate over DataFrame rows with tqdm
for _, row in tqdm(
    zips_selected.sample(500).iterrows(), total=zips_selected.sample(500).shape[0]
):
    response = send_query(row["latitude"], row["longitude"])

    if response.status_code == 200:
        results = response.json()["data"]["findNearLocations"]["results"]
    else:
        print(f"Error: Status code {response.status_code}")

    for r in results:
        storeNumber = r["storeNumber"]
        latitude = r["coordinates"]["latitude"]
        longitude = r["coordinates"]["longitude"]
        address = r["address"]["address"]
        city = r["address"]["city"]
        state = r["address"]["state"]
        zip = r["address"]["zip"]

        results_dict = {
            "store_number": storeNumber,
            "latitude": latitude,
            "longitude": longitude,
            "street": address,
            "city": city,
            "state": state,
            "zip": zip,
        }

        results_list.append(results_dict)

  0%|          | 0/500 [00:00<?, ?it/s]

----

## Structure

#### Read list of dictionaries into a dataframe

In [7]:
src_df = pd.DataFrame(results_list).drop_duplicates().reset_index(drop=True)

#### Deal with lowercase values

In [8]:
src_df["street"] = src_df["street"].str.title()
src_df["city"] = src_df["city"].str.title()

#### Just the columns we need

In [9]:
df = src_df[
    [
        "store_number",
        "street",
        "city",
        "state",
        "zip",
        "latitude",
        "longitude",
    ]
].copy()

#### Create a mapping of state abbreviations to full state names using the us library

In [10]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [11]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [12]:
df["brand"] = place_formal

#### Add fetch date

In [13]:
df["updated"] = today

#### The result:

In [14]:
df.head()

Unnamed: 0,store_number,street,city,state,zip,latitude,longitude,state_name,brand,updated
0,8344,966 Route 17 N,Ramsey,NJ,07446,41.065986,-74.130652,New Jersey,Wawa,2024-03-10
1,8326,150 Essex Street,Lodi,NJ,07644-2701,40.89193,-74.073249,New Jersey,Wawa,2024-03-10
2,8338,505 S River Street,Hackensack,NJ,07601-6621,40.8627,-74.0363,New Jersey,Wawa,2024-03-10
3,8335,7 Jewell St,Garfield,NJ,07026-3715,40.881211,-74.119048,New Jersey,Wawa,2024-03-10
4,8353,7408 Tonnelle Ave,North Bergen,NJ,07047,40.803289,-74.019006,New Jersey,Wawa,2024-03-10


#### How many locations?

In [15]:
len(df)

860

---

## Geography

#### Make it a geodataframe

In [16]:
df_geo = df.copy()

In [17]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [18]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [19]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [20]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [21]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [22]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [23]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)