# Get Olive Garden locations

#### Load Python tools and Jupyter config

In [1]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data as vega_data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
place = "olive-garden"
place_formal = "Olive Garden"
color = "#707113"
today = pd.Timestamp.today().strftime("%Y-%m-%d")

---

## Scrape

#### Get a dataframe of ZIP Codes

In [4]:
zips_all = pd.read_json(
    "../../_reference/data/zips_reference_pop_gen.json"
).sort_values("population", ascending=False)

#### Get the most populous ZIPs and a sample of the rest

In [5]:
zips_top = zips_all.head(1000)
zips_sample = zips_all.tail(len(zips_all) - 1000).sample(1000)
zips = pd.concat([zips_sample, zips_top]).reset_index(drop=True)

In [6]:
len(zips)

2000

#### Headers for request

In [7]:
headers = {
    "accept-language": "en-US,en;q=0.9,es;q=0.8",
    "sec-fetch-mode": "cors",
    "user-agent": "Mozilla/5.0 (Linux; Android 13; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Mobile Safari/537.36",
    "x-request-id": "REQ_1711845432588",
    "x-source-channel": "WEB",
}

In [8]:
extracted_info = []

for index, row in tqdm(zips.iterrows(), total=zips.shape[0]):
    latitude = row["latitude"]
    longitude = row["longitude"]

    params = {
        "locale": "en_US",
        "latitude": latitude,
        "longitude": longitude,
        "resultsPerPage": "10",
    }

    try:
        response = requests.get(
            "https://www.olivegarden.com/api/restaurants",
            params=params,
            headers=headers,
        )
        response.raise_for_status()  # This will raise an exception for HTTP error codes

        restaurants = response.json().get("restaurants", [])
        for restaurant in restaurants:
            phone_details = restaurant.get("contactDetail", {}).get("phoneDetail", [])
            phone_number = phone_details[0]["phoneNumber"] if phone_details else "N/A"

            address = restaurant.get("contactDetail", {}).get("address", {})
            coordinates = address.get("coordinates", {})

            restaurant_info = {
                "location_name": restaurant.get("restaurantName", "N/A"),
                "location_number": restaurant.get("restaurantNumber", "N/A"),
                "street": address.get("street1", "N/A"),
                "city": address.get("city", "N/A"),
                "state_name": (
                    address.get("stateName", "N/A").title()
                    if address.get("stateName")
                    else "N/A"
                ),
                "zip": address.get("zipCode", "N/A"),
                "phone": phone_number,
                "latitude": coordinates.get("latitude", "N/A"),
                "longitude": coordinates.get("longitude", "N/A"),
                "open_date": restaurant.get("restaurantOpenDate", "N/A"),
            }

            extracted_info.append(restaurant_info)
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error for latitude: {latitude}, longitude: {longitude}: {err}")
    except Exception as e:
        print(f"Error for latitude: {latitude}, longitude: {longitude}: {e}")

  0%|          | 0/2000 [00:00<?, ?it/s]

In [9]:
df = pd.DataFrame(extracted_info).drop_duplicates(subset="location_number")

#### How many locations

In [10]:
len(df)

911

#### The result

In [11]:
df.head()

Unnamed: 0,location_name,location_number,street,city,state_name,zip,phone,latitude,longitude,open_date
0,Orange - Connecticut,1388,439 Boston Post Rd.,Orange,Connecticut,64773507,2037958600,41.259112,-73.011775,1993-04-19
1,Danbury,1463,36 Backus Ave.,Danbury,Connecticut,68107329,2037432144,41.374711,-73.486486,1995-11-06
2,North Haven,1776,310 Universal Drive North,North Haven,Connecticut,64733163,2032341327,41.35504,-72.872338,2008-12-08
3,Centereach Mall,1507,257 Centereach Mall,Centereach,New York,117202738,6315854027,40.859118,-73.082171,1996-09-09
4,Bay Shore,4475,1715 Sunrise Highway,Bay Shore,New York,117066007,6316655213,40.737139,-73.244328,2014-10-06


#### Create a mapping of state abbreviations to full state names using the us library

In [12]:
state_mapping = {state.name: state.abbr for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [13]:
df["state"] = df["state_name"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [14]:
df["brand"] = place_formal

#### Add fetch date

In [15]:
df["updated"] = today

---

## Geography

#### Make it a geodataframe

In [16]:
df_geo = df.copy()

In [17]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
).set_crs("4326")

---

## Maps

#### US states background

In [18]:
background = (
    alt.Chart(alt.topo_feature(vega_data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [19]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [20]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [21]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [22]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [23]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)