# Get Red Lobster locations

#### Load Python tools and Jupyter config

In [1]:
import us
import os
import re
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data as vega_data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
place = "red-lobster"
place_formal = "Red Lobster"
color = "#E40F3A"
today = pd.Timestamp.today().strftime("%Y-%m-%d")

---

## Fetch

#### Headers for requests

In [4]:
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

In [5]:
locations_directory = "https://www.redlobster.com/seafood-restaurants/locations/"

In [6]:
# Step 1: Fetch the webpage content
url = "https://www.redlobster.com/seafood-restaurants/locations/"
response = requests.get(url)
html_content = response.text

# Step 2: Parse the HTML to find the script tag containing the JSON
soup = BeautifulSoup(html_content, "html.parser")
script_tag = soup.find(
    "script", string=lambda x: x and "window.__PRELOADED_PROPS__" in x
)

# Step 3: Extract the JSON text
json_text = (
    script_tag.string.split("window.__PRELOADED_PROPS__ = ")[1]
    .split("window.__PRELOADED_CONTEXT__")[0]
    .strip()
)
json_text = json_text.rsplit("}", 1)[0] + "}"

# Load the JSON
data = json.loads(json_text)

# Step 4: Extract the required information and store it in a DataFrame
locations = []

for state in data["states"]:
    state_name = state["name"]
    for city in state["cities"]:
        city_name = city["name"]
        city_url = city["url"]
        locations.append({"state": state_name, "city": city_name, "url": city_url})

locations_directory = pd.DataFrame(locations)

#### Pull from the API, not the HTML directory

In [7]:
all_locations = requests.get(
    "https://www.redlobster.com/api/location/getlocations/?latitude=33.9863445&longitude=-118.4231333&radius=20000&limit=800",
    headers=headers,
).json()["locations"]

In [8]:
locations_data_list = [
    {
        "location_id": loc["location"]["rlid"],
        "location_no": loc["location"]["restaurantNumber"],
        "street": loc["location"]["address1"],
        "city": loc["location"]["city"],
        "state": loc["location"]["state"],
        "zip": loc["location"]["zip"],
        "longitude": loc["location"]["longitude"],
        "latitude": loc["location"]["latitude"],
        "phone": loc["location"]["phone"],
        "closed": loc["location"]["isTemporarilyClosed"],
    }
    for loc in all_locations
]

In [9]:
# Remove duplicates based on 'location_id'
src_df = pd.DataFrame(locations_data_list).drop_duplicates(subset="location_id")

In [10]:
src_df.closed.value_counts()

closed
False    574
True      97
Name: count, dtype: int64

#### One that's closed

In [11]:
src_df.query('city == "Kingston"')

Unnamed: 0,location_id,location_no,street,city,state,zip,longitude,latitude,phone,closed
591,2018135,8135,410 Bath Road,Kingston,ON,K7M4X6,-76.557992,44.236901,(613) 545-1058,False
648,10692,692,1 Miron Lane,Kingston,NY,12401,-73.989785,41.960942,(845) 336-5021,True


In [12]:
len(src_df)

671

#### Create a mapping of state abbreviations to full state names using the us library

In [13]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [14]:
src_df["state_name"] = src_df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [15]:
src_df["brand"] = place_formal

#### Add fetch date

In [16]:
src_df["updated"] = today

In [17]:
canada = ["MB", "ON", "SK", "AB"]

In [18]:
src_df["is_canada"] = src_df["state"].isin(canada)

#### If you wanted to remove Canada or closed stores

In [19]:
# us_df = src_df.query(f"is_canada == False and closed == False")
# canada_df = src_df.query(f"is_canada == True and closed == False")
# df = src_df.query(f"is_canada == False")

In [20]:
df = src_df.copy()

---

## Geography

#### Make it a geodataframe

In [21]:
df_geo = df.copy()

In [22]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
).set_crs("4326")

---

## Maps

#### US states background

In [23]:
background = (
    alt.Chart(alt.topo_feature(vega_data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [24]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=10, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [25]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [26]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [27]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [28]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)