# Get Culver's locations

#### Load Python tools and Jupyter config

In [83]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [84]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [85]:
place = "culvers"
place_formal = "Culver's"
color = "#005696"
today = pd.Timestamp.today().strftime("%Y-%m-%d")

---

## Scrape

#### Headers for request

In [86]:
headers = {
    "authority": "www.culvers.com",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
}

#### Limit our search. Which states [have a Culver's](https://www.culvers.com/stories/quizzes/how-well-do-you-know-your-culvers-locations)? 

In [87]:
states = [
    "Illinois",
    "Indiana",
    "Iowa",
    "Kansas",
    "Michigan",
    "Minnesota",
    "Missouri",
    "Nebraska",
    "North Dakota",
    "Ohio",
    "South Dakota",
    "Wisconsin",
    "Colorado",
    "Idaho",
    "Utah",
    "Wyoming",
    "Arizona",
    "Texas",
    "Alabama",
    "Florida",
    "Georgia",
    "Kentucky",
    "North Carolina",
    "South Carolina",
    "Tennessee",
]

#### Import the country's largest ZIP Codes and ensure they have five digits

In [88]:
zips = (
    pd.read_json("../../_reference/data/zip_code_demographics_esri.json")
    .query("population > 5000")
    .sort_values("population", ascending=False)
    .reset_index(drop=True)
)
zips["zipcode"] = zips["zipcode"].astype(str).str.zfill(5)

In [89]:
zips_selected = zips.query(f"state_name.isin({states})").copy()

#### Get a list of select ZIP Codes

In [90]:
top = zips_selected["zipcode"].head(400).to_list()
bottom = (
    zips_selected[~zips_selected["zipcode"].isin(top)]["zipcode"].sample(400).to_list()
)
zips_list = top + bottom

#### Loop through list, read into a list of dataframes

In [91]:
responses_list = []

for z in tqdm(zips_list):

    params = {
        "location": z,
        "limit": "100",
    }

    response = requests.get(
        "https://www.culvers.com/api/restaurants/getLocations",
        params=params,
        headers=headers,
    )

    for g in response.json()["data"]["geofences"]:
        id = g["externalId"]
        street = g["metadata"]["street"]
        city = g["metadata"]["city"]
        state = g["metadata"]["state"]
        zip = g["metadata"]["postalCode"]
        slug = g["metadata"]["slug"]
        longitude = g["geometryCenter"]["coordinates"][0]
        latitude = g["geometryCenter"]["coordinates"][1]

        responses_dict = {
            "id": id,
            "latitude": latitude,
            "longitude": longitude,
            "street": street,
            "city": city,
            "state": state,
            "zip": zip,
            "url": "https://www.culvers.com/restaurants/" + slug,
        }

        responses_list.append(responses_dict)

  0%|          | 0/800 [00:00<?, ?it/s]

#### Concatenate the dataframes

In [95]:
df = pd.DataFrame(responses_list).drop_duplicates()

In [96]:
df.head()

Unnamed: 0,id,latitude,longitude,street,city,state,zip,url
0,881,29.695101,-95.848999,6677 Flewellen Way,Fulshear,TX,77441,https://www.culvers.com/restaurants/fulshear-tx-flewellen-way
2,64,33.169716,-96.672142,4200 W Eldorado Pkwy,McKinney,TX,75070,https://www.culvers.com/restaurants/mckinney
3,554,33.069481,-96.878349,5100 State Hwy 121,Lewisville,TX,75056,https://www.culvers.com/restaurants/the-colony-tx-hwy121
4,224,32.900322,-96.467117,2475 Ridge Rd,Rockwall,TX,75087,https://www.culvers.com/restaurants/rockwall
5,319,33.01329,-97.072624,2709 Flower Mound Rd,Flower Mound,TX,75022,https://www.culvers.com/restaurants/flower-mound


In [97]:
len(df)

794

#### Create a mapping of state abbreviations to full state names using the us library

In [98]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [99]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [100]:
df["brand"] = place_formal

#### Add fetch date

In [102]:
df["updated"] = pd.Timestamp.today().strftime("%m/%d/%Y")

---

## Geography

#### Make it a geodataframe

In [103]:
df_geo = df.copy()

In [104]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [105]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [106]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [107]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [108]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [109]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [110]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)