# Del Taco locations

#### Load Python tools and Jupyter config

In [16]:
import us as usa
import re
import json
import black
import random
import requests
import numpy as np
import pandas as pd
import jupyter_black
import altair as alt
from time import sleep
import geopandas as gpd
from random import randint
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
place = "del-taco"
place_formal = "Del Taco"
color = "#727624"

---

## Scrape

#### Get links to state pages

In [4]:
us_response = requests.get("https://locations.deltaco.com/us")
us_soup = BeautifulSoup(us_response.text, "html.parser")

In [5]:
us_list = []

for us in us_soup.find_all("a", class_="state"):
    us_list.append(f"https://locations.deltaco.com{us['href']}")

#### Loop over US directory, get state links

In [6]:
state_locations = []

for state in tqdm(us_list):
    state_response = requests.get(state)
    state_soup = BeautifulSoup(state_response.text, "html.parser")
    divs = state_soup.find_all("div", "city-name")
    for d in divs:
        location = d.find("a")["href"]
        state_locations.append(f"https://locations.deltaco.com{location}")

  0%|          | 0/17 [00:00<?, ?it/s]

#### Loop over state directories, get city links

In [7]:
loc_links = []

for loc in tqdm(state_locations):
    loc_response = requests.get(loc)
    loc_soup = BeautifulSoup(loc_response.text, "html.parser")
    try:
        loc_links.append(
            f"https://locations.deltaco.com{loc_soup.find('a', class_='name')['href']}"
        )
    except:
        continue

  0%|          | 0/329 [00:00<?, ?it/s]

In [8]:
len(loc_links)

328

#### Loop over city directories, get location jsons

In [9]:
jsons = []

for l in tqdm(loc_links):
    l_response = requests.get(l)
    l_soup = BeautifulSoup(l_response.text, "html.parser")
    script_tag = l_soup.find("script", type="application/ld+json")
    jsons.append(
        json.loads(
            script_tag.string.strip()
            .replace("\r\n", "")
            .replace("    ", "")
            .replace("} } }", "} }")
        )
    )

  0%|          | 0/328 [00:00<?, ?it/s]

In [10]:
src = pd.DataFrame(jsons)

In [11]:
src[
    [
        "@type",
        "addressCountry",
        "addressLocality",
        "addressRegion",
        "postalCode",
        "streetAddress",
    ]
] = pd.json_normalize(src["address"])

In [12]:
src[["@type", "latitude", "longitude"]] = pd.json_normalize(src["geo"])

#### Clean up in a dataframe with just the columns we need

In [13]:
df = (
    src[
        [
            "streetAddress",
            "addressLocality",
            "addressRegion",
            "postalCode",
            "telephone",
            "latitude",
            "longitude",
            "url",
        ]
    ]
    .rename(
        columns={
            "addressLocality": "city",
            "streetAddress": "street",
            "addressRegion": "state",
            "postalCode": "zip",
        }
    )
    .copy()
)

#### Create a mapping of state abbreviations to full state names using the us library

In [17]:
state_mapping = {state.abbr: state.name for state in usa.states.STATES}

#### New column of full state names based on abbreviations

In [18]:
df["state_name"] = df["state"].map(state_mapping)

---

## Geography

#### Make it a geodataframe

In [19]:
df_geo = df.copy()

In [20]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

In [21]:
locations_gdf = gdf.set_crs("EPSG:4326").copy()

---

## Maps

#### US states background

In [22]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [23]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=10, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [24]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state_name"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state_name:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [25]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [26]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [27]:
locations_gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)