# Get XyXy locations

#### Load Python tools and Jupyter config

In [13]:
import us 
import re
import json
import black
import random
import requests
import numpy as np
import pandas as pd
import jupyter_black
import altair as alt
from time import sleep
import geopandas as gpd
from random import randint
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [14]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [15]:
place = "costco"
place_formal = "Costco"
color = "#______"
latitude = "39.106667"
longitude = "-94.676392"

## Scrape

#### Headers for requests

In [None]:
headers = {
    "authority": "www.costco.com",
    "accept": "*/*",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "x-requested-with": "XMLHttpRequest",
}

In [33]:
params = {
    "langId": "-1",
    "numOfWarehouses": "10",
    "latitude": "33.97574997",
    "longitude": "-118.25048828",
    "countryCode": "US",
}

response = requests.get(
    "https://www.costco.com/AjaxWarehouseBrowseLookupView",
    params=params,
    # cookies=cookies,
    headers=headers,
).json()[1:]

In [34]:
# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(response_data)

In [35]:
df

Unnamed: 0,stlocID,displayName,identifier,phone,fax,address1,city,state,country,zipCode,manager,openDate,tiresDepartmentPhone,distance,latitude,longitude,parentGeoNodeID,active,languageID,hasGasDepartment,hasTiresDepartment,hasFoodDepartment,hasHearingDepartment,hasPharmacyDepartment,hasOpticalDepartment,hasBusinessDepartment,hasPhotoCenterDepartment,locationName,isShipToWarehouse,isWarehousePickup,enableShipToHome
0,769,769,769,(310) 242-2777,,3560 W CENTURY BLVD,INGLEWOOD,CA,US,90303-1201,JOHANNA DIAZ,"Jul 8, 1985",(310) 242-2782,5.290323,33.943,-118.334,10175,1,-1,True,True,True,True,True,True,False,False,Inglewood,True,True,False
1,569,569,569,(323) 767-2640,,6333 TELEGRAPH RD,COMMERCE,CA,US,90040-2513,MIGUEL GALLARDO,"Jul 23, 2009",(323) 767-2640,6.407811,33.995,-118.141,10175,1,-1,True,False,True,False,False,False,True,False,Commerce Business Center,True,False,False
2,564,564,564,(310) 220-8826,,12530 PRAIRIE AVE,HAWTHORNE,CA,US,90250-4638,DEWAYNE DAY (MGR),"Mar 20, 2009",(310) 220-8826,6.590284,33.919,-118.343,10175,1,-1,True,False,False,False,False,False,True,False,Hawthorne Business Center,True,False,False
3,671,671,671,(310) 727-0403,,14501 HINDRY AVENUE,HAWTHORNE,CA,US,90250-6748,JAMES STUDEBAKER,"Dec 5, 2000",(310) 727-0418,8.794066,33.899,-118.373,10175,1,-1,True,True,True,True,True,True,False,False,Hawthorne,True,True,False
4,410,410,410,(562) 929-0826,,12324 HOXIE AVE,NORWALK,CA,US,90650-2211,JENN PELL,"Apr 17, 1983",(562) 929-9837,9.283427,33.92,-118.103,10175,1,-1,True,True,True,True,True,True,False,False,Norwalk CA,True,True,False
5,1318,1318,1318,(323) 890-1904,,2000 MARKET PLACE DR,MONTEREY PARK,CA,US,91755-7402,DAMOND CHAPMAN,"Nov 6, 1993",(323) 890-0129,9.390881,34.037,-118.104,10175,1,-1,True,True,True,True,True,True,False,False,Monterey Park,True,True,False
6,428,428,428,(626) 289-7164,,2207 W COMMONWEALTH AVE,ALHAMBRA,CA,US,91803-1302,ANTHONY STOUT,"Aug 24, 1987",(626) 281-8679,9.774295,34.089,-118.148,10175,1,-1,True,True,True,True,True,True,False,False,Alhambra,True,True,False
7,130,130,130,(323) 644-5201,,2901 LOS FELIZ BLVD,LOS ANGELES,CA,US,90039-1502,SCOTT KIRBY,"Nov 15, 1996",(323) 644-5212,10.540749,34.128,-118.264,10175,1,-1,False,True,True,True,True,True,False,False,Los Feliz,True,True,False
8,1050,1050,1050,(562) 295-1508,,340 LAKEWOOD CENTER MALL,LAKEWOOD,CA,US,90712-2409,DAVID DIAZ,"Feb 26, 2009",(562) 295-1508,10.847896,33.85,-118.137,10175,1,-1,True,True,True,True,True,True,False,False,Lakewood,True,True,False
9,479,479,479,(310) 754-2003,,13463 WASHINGTON BLVD,MARINA DEL REY,CA,US,90292-5658,CARL BARRIO,"Jun 24, 1999",(310) 754-2020,11.256795,33.993,-118.446,10175,1,-1,True,True,True,True,True,True,False,False,Culver City,True,True,False


---

#### Make it a geodataframe

#### State codes

In [14]:
df["state_abbr"] = pd.json_normalize(df["state"])
df["state_abbr"] = df["state_abbr"].str.replace("US-", "")

#### Create a mapping of state abbreviations to full state names using the us library

In [15]:
state_mapping = {state.abbr: state.name for state in usa.states.STATES}

#### New column of full state names based on abbreviations

In [16]:
df["state_name"] = df["state_abbr"].map(state_mapping)

---

## Geography

#### Make it a geodataframe

In [10]:
df_geo = df.copy()

In [11]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

In [12]:
locations_gdf = gdf.set_crs("EPSG:4326").copy()

---

## Maps

#### US states background

In [13]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [15]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=10, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [16]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [17]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [18]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [19]:
locations_gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)