# Get XyXy locations

#### Load Python tools and Jupyter config

In [1]:
import us 
import json
import black
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

In [3]:
place = "XYXYXYXY"
place_formal = "XYXYXYXY"
color = "#______"
latitude = "39.106667"
longitude = "-94.676392"

## Scrape

#### Headers for the request

In [4]:
headers = {
    "sec-ch-ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    "x-forter-token": "5ad4acbee61a4c2cb93c7b44f4bc1ca6_1709437039163__UDF43-m4_13ck_tt",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
}

#### Import the county's largest ZIP Codes and ensure they have five digits

In [5]:
zips = (
    pd.read_json("../_reference/data/zip_code_demographics_esri.json")
    .query("population > 5000")
    .sort_values("population", ascending=False)
    .reset_index(drop=True)
)
zips["zipcode"] = zips["zipcode"].astype(str).str.zfill(5)

#### Get a list of ZIP Codes

In [6]:
top = zips.head(500)
bottom = zips.tail(100)
zipcodes_df = pd.concat([top, bottom])

In [7]:
zipcodes_df.head()

Unnamed: 0,zipcode,name,state_name,state,population,population_sqmi,households,avg_hh_size,med_hh_income,avg_hh_income,per_cap_income,diversity_index,area_meters,latitude,longitude
0,77494,Katy,Texas,TX,163194.0,4061.9,49704.0,3.28,134912.0,172134.0,52497.0,74.9,138683779.5,29.744751,-95.826242
1,77449,Katy,Texas,TX,134540.0,4989.9,38237.0,3.52,78096.0,95889.0,27292.0,84.7,92847306.5,29.836113,-95.737685
2,75070,Mckinney,Texas,TX,123055.0,4960.6,42898.0,2.87,108330.0,135167.0,47167.0,61.7,91997310.5,33.172003,-96.69777
3,11368,Corona,New York,NY,121409.0,58176.7,30724.0,3.92,53498.0,73033.0,18735.0,92.8,9379884.5,40.749593,-73.855624
4,77084,Houston,Texas,TX,121112.0,3000.4,39290.0,3.08,76385.0,101532.0,32945.0,85.1,139323192.0,29.826236,-95.648321


#### All the locations

In [None]:
response_list = []

for k, v in tqdm(zipcodes_df.iterrows(), total=zipcodes_df.shape[0]):
    latitude = v["latitude"]
    longitude = v["longitude"]

    params = {
        "operationName": "GetRestaurantsV2",
        "variables": f'{{"input":{{"filter":"NEARBY","coordinates":{{"userLat":{latitude},"userLng":{longitude},"searchRadius":528000}},"first":1000,"status":"OPEN"}}}}',
        "extensions": '{"persistedQuery":{"version":1,"sha256Hash":"05c231a96351360e01af3a4159a83ff551d5d37e9743b1b78cb2885682e7cc60"}}',
    }

    response = requests.get(
        "https://use1-prod-plk-gateway.rbictg.com/graphql",
        params=params,
        headers=headers,
    ).json()["data"]["restaurants"]["nodes"]

    src = pd.DataFrame(response)

    response_list.append(src)

  0%|          | 0/600 [00:00<?, ?it/s]

In [None]:
src_df = pd.concat(response_list).reset_index(drop=True)

In [None]:
src_df[
    [
        "address1",
        "address2",
        "city",
        "country",
        "postalCode",
        "stateProvince",
        "stateProvinceShort",
        "__typename",
    ]
] = pd.json_normalize(src["physicalAddress"])

In [None]:
df = src_df[
    [
        "number",
        "email",
        "address1",
        "city",
        "postalCode",
        "stateProvince",
        "phoneNumber",
        "latitude",
        "longitude",
    ]
].rename(columns={'address1':'street', 'postalCode':'zip', 'phoneNumber':'phone', 'stateProvince':'state'}).copy()

#### Create a mapping of state abbreviations to full state names using the us library

In [None]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [None]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [None]:
df['brand'] = place_formal

---

## Geography

#### Make it a geodataframe

In [None]:
df_geo = df.copy()

In [None]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [None]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [None]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [None]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [None]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [None]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [None]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)