# Get PeiWei locations

#### Load Python tools and Jupyter config

In [1]:
%load_ext lab_black

In [2]:
import json
import requests
import pandas as pd
import geopandas as gpd
import altair as alt
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm, trange
from vega_datasets import data

In [3]:
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
pd.options.display.max_colwidth = None

## Read data

#### All the locations

In [4]:
place = "pei-wei"
place_formal = "Pei Wei"

#### Headers

In [5]:
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

In [6]:
response = requests.get("https://www.peiwei.com/locations/", headers=headers)

In [7]:
soup = BeautifulSoup(response.text, "html.parser")

In [8]:
json_data = json.loads(
    soup.find_all("script")[9]
    .string.replace("\r\n  const locations=", "")
    .replace("\r\n  \r\n", "")
)

In [9]:
src = pd.DataFrame(json_data)[
    [
        "id",
        "address",
        "latitude",
        "longitude",
        "title",
        "link",
        "phone",
    ]
]

#### Clean up

In [10]:
src["address"] = src["address"].str.replace(", USA", "")
src["link"] = (
    src["link"].str.strip("").str.replace("https://www.peiwei.com/", "", regex=False)
)

In [11]:
src["state"] = src["title"].str[-2:]

In [12]:
df = src.copy()

In [13]:
df.head()

Unnamed: 0,id,address,latitude,longitude,title,link,phone,state
0,1464,"4801 Beltline Rd. Addison, TX 75254",32.9546977,-96.8294263,"Addison, Dallas TX",texas/60-addison/,(972) 764-0844,TX
1,1459,"1008 W McDermott Dr. Suite 100 Allen, TX 75013",33.1013975,-96.6874959,"Allen, TX",texas/44-allen/,(469) 675-2266,TX
2,1362,"355 E Altamonte Drive #100 Altamonte Springs, FL 32701",28.6636289,-81.3806041,"Altamonte Springs, FL",florida/228-altamonte-springs/,(407)-834-0752,FL
3,1325,"4340 E Indian School Rd. Bldg. A Suite 23 Phoenix, AZ 85018",33.4954758,-111.9881878,"Arcadia, Phoenix, AZ",arizona/2-arcadia/,(602) 956-2300,AZ
4,2548,"1290 S Normal Ave, Tempe, AZ",33.417635,-111.934427,"Arizona State University, AZ",arizona/30012-arizona-state-university/,,AZ


In [14]:
len(df)

121

---

## Geography

#### Make it a geodataframe

In [15]:
df_geo = df.copy()

In [16]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

In [17]:
locations_gdf = gdf.set_crs("EPSG:4326").copy()

---

## Maps

#### US states background

In [18]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [19]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=10, color="red")
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [20]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value("red"),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### CSV

In [21]:
df.to_csv(f"data/processed/{place}_locations.csv", index=False)

#### JSON

In [22]:
df.to_json(f"data/processed/{place}_locations.json", indent=4, orient="records")

#### GeoJSON

In [23]:
locations_gdf.to_file(f"data/processed/{place}_locations.geojson", driver="GeoJSON")