# Get XyXy locations

#### Load Python tools and Jupyter config

In [1]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data as vega_data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
place = "little-caesars"
place_formal = "Little Caesars"
color = "#ff6000"
today = pd.Timestamp.today().strftime("%Y-%m-%d")

---

## Scrape

#### Get a dataframe of ZIP Codes

In [4]:
zips_all = pd.read_json(
    "../../_reference/data/zips_reference_pop_gen.json"
).sort_values("population", ascending=False)

#### Get the most populous ZIPs and a sample of the rest

In [5]:
zips_top = zips_all.head(1000)
zips_sample = zips_all.tail(len(zips_all) - 1000).sample(1000)
zips = pd.concat([zips_sample, zips_top]).reset_index(drop=True)

#### Headers for request

In [6]:
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

In [7]:
store_list = []

for store in tqdm(range(1, 20000)):
    try:
        response = requests.get(
            f"https://api.cloud.littlecaesars.com/bff/api/v6/store/location/{store}",
            headers=headers,
        )
        # Try to access "storeInfo", skip if not present
        store_info = response.json().get("storeInfo")

        # Continue to the next iteration if "storeInfo" is None
        if store_info is None:
            # print(f"No storeInfo found for store {store}. Skipping...")
            continue

        # Extract address safely
        address = store_info.get("address", {})

        # Append store information to list
        store_list.append(
            {
                "locationNumber": store_info.get("locationNumber", ""),
                "store_id": store_info.get("franchiseStoreId", ""),
                "address1": address.get("address1", ""),
                "address2": address.get("address2", ""),
                "city": address.get("city", ""),
                "state": address.get("state", ""),
                "latitude": address.get("latitude", ""),
                "longitude": address.get("longitude", ""),
                "store_type": store_info.get("storeType", ""),
                "store_name": store_info.get("storeName", ""),
                "phone_number": store_info.get("phoneNumber", ""),
            }
        )

    except requests.RequestException as e:
        print(f"Request failed for store {store}: {e}")
    except KeyError as e:
        print(f"No data for {e} for store {store}. Skipping...")

  0%|          | 0/19999 [00:00<?, ?it/s]

In [8]:
df = pd.DataFrame(store_list)

In [9]:
len(df)

5154

#### Create a mapping of state abbreviations to full state names using the us library

In [10]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [None]:
df["state_name"] = df["state"].map(state_mapping)

In [40]:
us_states = list(state_mapping.keys())

#### Make sure our brand name gets in the dataframe

In [12]:
df["brand"] = place_formal

#### Add fetch date

In [13]:
df["updated"] = today

---

## Geography

#### Make it a geodataframe

In [None]:
df_geo = df.copy()

In [None]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
).set_crs("4326")

In [41]:
us_gdf = gdf.query(f"state.isin({us_states})")

---

## Maps

#### US states background

In [69]:
background = (
    alt.Chart(alt.topo_feature(vega_data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### US location points map

In [70]:
points = (
    alt.Chart(us_gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [47]:
symbols = (
    alt.Chart(us_gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
)

symbol_map = (background + symbols).properties(
    title=f"Number of {place_formal} in US, by average lon/lat of locations"
)
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [44]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [45]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [46]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)