# Get Chili's locations

#### Load Python tools and Jupyter config

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import os
import requests
import json

os.environ["USE_PYGEOS"] = "0"
import geopandas as gpd
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm, trange
import us

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Scrape

#### Headers for requests

In [4]:
headers = {
    "Connection": "keep-alive",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
}

#### Make a list of state URLs

In [5]:
states = [
    "Alabama",
    "Alaska",
    "Arizona",
    "Arkansas",
    "California",
    "Colorado",
    "Connecticut",
    "Delaware",
    "Florida",
    "Georgia",
    "Hawaii",
    "Idaho",
    "Illinois",
    "Indiana",
    "Iowa",
    "Kansas",
    "Kentucky",
    "Louisiana",
    "Maine",
    "Maryland",
    "Massachusetts",
    "Michigan",
    "Minnesota",
    "Mississippi",
    "Missouri",
    "Montana",
    "Nebraska",
    "Nevada",
    "New Hampshire",
    "New Jersey",
    "New Mexico",
    "New York",
    "North Carolina",
    "North Dakota",
    "Ohio",
    "Oklahoma",
    "Oregon",
    "Pennsylvania",
    "Rhode Island",
    "South Carolina",
    "South Dakota",
    "Tennessee",
    "Texas",
    "Utah",
    "Vermont",
    "Virginia",
    "Washington",
    "West Virginia",
    "Wisconsin",
    "Wyoming",
]

In [6]:
state_urls = []
for s in tqdm(states):
    state_urls.append(f"https://chilis.com/locations/us/{s.lower().replace(' ', '-')}")

  0%|          | 0/50 [00:00<?, ?it/s]

#### Loop through state urls and collect cities with restaurants and their respective URLs

In [7]:
state_responses = []
cities = []

for sr in tqdm(state_urls):
    state_response = requests.get(sr, headers=headers)
    state_soup = BeautifulSoup(state_response.text, "html.parser")
    state_data = state_soup.findAll("a", class_="top-locations-link")
    for sd in state_data:
        cities_dict = {
            "city_url": f'https://chilis.com{sd["href"]}',
            "city_name": sd["href"]
            .replace("locations/us/", "")
            .replace(sr.replace("https://chilis.com/locations/us/", ""), "")
            .title()
            .replace("//", ""),
            "state_name": sr.replace("https://chilis.com/locations/us/", "").title(),
        }
        cities.append(cities_dict)

  0%|          | 0/50 [00:00<?, ?it/s]

#### Each city page has a list of cities in that area, with links to location detail pages

In [8]:
city_details = []

for s in tqdm(cities):
    city_response = requests.get(s["city_url"], headers=headers)
    city_soup = BeautifulSoup(city_response.text, "html.parser")
    city_data = city_soup.findAll("a", class_="details-btn")
    for cd in city_data:
        city_data_dict = {
            "city_detail_url": cd["href"],
        }
        city_details.append(city_data_dict)

  0%|          | 0/1024 [00:00<?, ?it/s]

#### Make a dataframe from the detail page URLs

In [9]:
city_details_df = pd.DataFrame(city_details).drop_duplicates().reset_index(drop=True)

In [10]:
city_details_df["city_detail_url"] = (
    "https://chilis.com/" + city_details_df["city_detail_url"]
)

#### Scrape detail pages, grabbing inline json to populate location info

In [11]:
city_details_urls = city_details_df["city_detail_url"].to_list()

In [12]:
det_data_list = []

for det in tqdm(city_details_urls):
    det_response = requests.get(det, headers=headers)
    det_soup = BeautifulSoup(det_response.text, "html.parser")
    data = json.loads(det_soup.findAll("script", type="application/ld+json")[1].text)
    det_data_dict = {
        "brand": data["brand"],
        "location_name": data["name"],
        "phone": data["telephone"],
        "street": data["address"]["streetAddress"],
        "city": data["address"]["addressLocality"],
        "state": data["address"]["addressRegion"],
        "zip": data["address"]["postalCode"],
        "latitude": data["geo"]["latitude"],
        "longitude": data["geo"]["longitude"],
        "url": data["url"],
    }
    det_data_list.append(det_data_dict)

  0%|          | 0/1233 [00:00<?, ?it/s]

ConnectionError: HTTPSConnectionPool(host='chilis.com', port=443): Max retries exceeded with url: //locations/us/tennessee/chattanooga/chattanooga/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x12062d490>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))

#### Dataframe of all locations

In [None]:
df = pd.DataFrame(det_data_list)

In [None]:
df["state_name"] = (
    df["url"].str.split("/", expand=True)[5].str.title().str.replace("-", " ")
)

In [None]:
df.head()

In [None]:
df.tail()

#### Fix [two](https://www.chilis.com/locations/us/new-jersey/montclair/montclair-university/) [locations](https://www.chilis.com/locations/us/florida/miami/florida-intl-university/) with bad lon/lat on the page

In [None]:
df.loc[df["location_name"] == "Florida Intl University", "latitude"] = 25.7562861
df.loc[df["location_name"] == "Florida Intl University", "longitude"] = -80.3723177
df.loc[df["location_name"] == "Montclair University", "latitude"] = 40.859758
df.loc[df["location_name"] == "Montclair University", "longitude"] = -74.199745

---

## Analysis

#### Which states have the most locations?

In [None]:
state_counts = (
    df.groupby(["state_name", "state"])["location_name"]
    .count()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

In [None]:
state_counts.head(8)

#### What about if we normalize for population?

In [None]:
state_fips_codes = [
    {"state": "Alabama", "fips_code": "01"},
    {"state": "Alaska", "fips_code": "02"},
    {"state": "Arizona", "fips_code": "04"},
    {"state": "Arkansas", "fips_code": "05"},
    {"state": "California", "fips_code": "06"},
    {"state": "Colorado", "fips_code": "08"},
    {"state": "Connecticut", "fips_code": "09"},
    {"state": "Delaware", "fips_code": "10"},
    {"state": "Florida", "fips_code": "12"},
    {"state": "Georgia", "fips_code": "13"},
    {"state": "Hawaii", "fips_code": "15"},
    {"state": "Idaho", "fips_code": "16"},
    {"state": "Illinois", "fips_code": "17"},
    {"state": "Indiana", "fips_code": "18"},
    {"state": "Iowa", "fips_code": "19"},
    {"state": "Kansas", "fips_code": "20"},
    {"state": "Kentucky", "fips_code": "21"},
    {"state": "Louisiana", "fips_code": "22"},
    {"state": "Maine", "fips_code": "23"},
    {"state": "Maryland", "fips_code": "24"},
    {"state": "Massachusetts", "fips_code": "25"},
    {"state": "Michigan", "fips_code": "26"},
    {"state": "Minnesota", "fips_code": "27"},
    {"state": "Mississippi", "fips_code": "28"},
    {"state": "Missouri", "fips_code": "29"},
    {"state": "Montana", "fips_code": "30"},
    {"state": "Nebraska", "fips_code": "31"},
    {"state": "Nevada", "fips_code": "32"},
    {"state": "New Hampshire", "fips_code": "33"},
    {"state": "New Jersey", "fips_code": "34"},
    {"state": "New Mexico", "fips_code": "35"},
    {"state": "New York", "fips_code": "36"},
    {"state": "North Carolina", "fips_code": "37"},
    {"state": "North Dakota", "fips_code": "38"},
    {"state": "Ohio", "fips_code": "39"},
    {"state": "Oklahoma", "fips_code": "40"},
    {"state": "Oregon", "fips_code": "41"},
    {"state": "Pennsylvania", "fips_code": "42"},
    {"state": "Rhode Island", "fips_code": "44"},
    {"state": "South Carolina", "fips_code": "45"},
    {"state": "South Dakota", "fips_code": "46"},
    {"state": "Tennessee", "fips_code": "47"},
    {"state": "Texas", "fips_code": "48"},
    {"state": "Utah", "fips_code": "49"},
    {"state": "Vermont", "fips_code": "50"},
    {"state": "Virginia", "fips_code": "51"},
    {"state": "Washington", "fips_code": "53"},
    {"state": "West Virginia", "fips_code": "54"},
    {"state": "Wisconsin", "fips_code": "55"},
    {"state": "Wyoming", "fips_code": "56"},
]

#### Get population totals

In [None]:
base_url = "https://api.census.gov/data/2021/pep/population"
data_list = []

for state_info in state_fips_codes:
    # Construct the API URL for the current state
    api_url = f"{base_url}?get=NAME,POP_2021&for=state:{state_info['fips_code']}"

    response = requests.get(api_url)

    if response.status_code == 200:
        # Parse the JSON response
        result = response.json()

        data_list.append(result[1])

data_list.insert(0, ["state_name", "state_pop", "state_fips"])

In [None]:
pop_df = pd.DataFrame(data_list[1:], columns=data_list[0])

In [None]:
state_counts_pop = pd.merge(state_counts, pop_df, on="state_name")

In [None]:
state_counts_pop["rate_million"] = (
    (state_counts_pop["count"] / state_counts_pop["state_pop"].astype(int)) * 1000000
).round(2)

In [None]:
state_counts_pop.sort_values("rate_million", ascending=False).head()

In [None]:
state_counts_pop.sort_values("rate_million", ascending=False).tail()

---

## Geography

#### Geodataframe from lon/lat

In [None]:
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326"
)

#### Plot it

In [None]:
gdf.plot()

In [None]:
df = df.drop(["geometry"], axis=1)

---

## Exports

In [None]:
gdf.to_file("data/processed/chilis.geojson", driver="GeoJSON")

In [None]:
df.to_json("data/processed/chilis.json", orient="records", indent=4)

In [None]:
df.to_csv("data/processed/chilis.csv", index=False)