# Get Sonic locations

#### Load Python tools and Jupyter config

In [1]:
import us 
import json
import black
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [None]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Scrape data

#### Define variables we'll need

In [None]:
place = "sonic"
place_formal = "Sonic"
color = "#e40046"
latitude = "39.106667"
longitude = "-94.676392"

#### Headers

In [4]:
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

#### Set the base url with a wide radius

In [None]:
url = "https://api-idp.sonicdrivein.com/snc/web-exp-api/v1/location?latitude=39.106667&longitude=-94.67639264&radius=5000&limit=100"

#### Calculate the pagination with a 100 response limit

In [None]:
pages = requests.get(url).json()["metadata"]["totalPages"]

#### Loop over the pages, read responses as dataframes and stuff them into a list

In [None]:
dfs = []

for r in tqdm(range(0, pages)):
    response = requests.get(f"{url}&page={r}", headers=headers)
    response_df = pd.DataFrame(response.json()["locations"])
    dfs.append(response_df)

  0%|          | 0/36 [00:00<?, ?it/s]

#### Concatenate the list into one large dataframe

In [None]:
src = pd.concat(dfs)[
    [
        "id",
        "displayName",
        "timezone",
        "contactDetails",
        "details",
        "isClosed",
        "url",
    ]
].reset_index(drop=True)

#### Deal with nested address and coordinates columns

In [None]:
src[["phone", "street", "drop", "drop", "zip", "state", "drop", "city", "drop"]] = (
    pd.json_normalize(src["contactDetails"])
)

In [11]:
src[["latitude", "longitude"]] = pd.json_normalize(src["details"])

#### Clean up the location name

In [None]:
src["drop_name"] = src["displayName"].str.split(" \(", expand=True)[0]
src["name"] = src["drop_name"].str.split(", ", expand=True)[0].str.title()

#### Create a mapping of state abbreviations to full state names using the us library

In [13]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [14]:
src["state_name"] = src["state"].map(state_mapping)

#### Slim the dataframe to columns we need

In [None]:
df = (
    src[
        [
            "id",
            "name",
            "timezone",
            "street",
            "city",
            "state",
            "zip",
            "phone",
            "url",
            "latitude",
            "longitude",
            "state_name",
        ]
    ]
    .drop_duplicates(subset="id")
    .copy()
)

#### How many locations?

In [None]:
len(df)

3529

---

## Geography

#### Make it a geodataframe

In [17]:
df_geo = df.copy()

In [18]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [20]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [27]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [28]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [24]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [25]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [26]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)