# Get Kohl's locations

#### Load Python tools and Jupyter config

In [25]:
import us 
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [26]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [27]:
place = "kohls"
place_formal = "Kohl's"
color = "#bd0f1b"
latitude = "39.106667"
longitude = "-94.676392"

## Scrape

#### Headers for our request

In [56]:
headers = {
    "authority": "maps.kohlslocal.com",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
}

#### Import the county's largest ZIP Codes and ensure they have five digits

In [57]:
zips = (
    pd.read_json("../../_reference/data/zip_code_demographics_esri.json")
    .query("population > 25000")
    .sort_values("population", ascending=False)
    .reset_index(drop=True)
)
zips["zipcode"] = zips["zipcode"].astype(str).str.zfill(5)

In [58]:
zips.head()

Unnamed: 0,zipcode,name,state_name,state,population,population_sqmi,households,avg_hh_size,med_hh_income,avg_hh_income,per_cap_income,diversity_index,area_meters,latitude,longitude
0,77494,Katy,Texas,TX,163194.0,4061.9,49704.0,3.28,134912.0,172134.0,52497.0,74.9,138683779.5,29.744751,-95.826242
1,77449,Katy,Texas,TX,134540.0,4989.9,38237.0,3.52,78096.0,95889.0,27292.0,84.7,92847306.5,29.836113,-95.737685
2,75070,Mckinney,Texas,TX,123055.0,4960.6,42898.0,2.87,108330.0,135167.0,47167.0,61.7,91997310.5,33.172003,-96.69777
3,11368,Corona,New York,NY,121409.0,58176.7,30724.0,3.92,53498.0,73033.0,18735.0,92.8,9379884.5,40.749593,-73.855624
4,77084,Houston,Texas,TX,121112.0,3000.4,39290.0,3.08,76385.0,101532.0,32945.0,85.1,139323192.0,29.826236,-95.648321


In [None]:
len(zips)

#### Loop through sample of ZIP Codes to request stores within a 100 miles

In [96]:
response_list = []

sample = zips["zipcode"].sample(3).to_list()

for s in tqdm(sample):
    params = {
        "template": "search",
        "level": "search",
        "search": f"{s}",
        "strict": 0,
    }
    
    response = requests.get(
        "https://maps.kohlslocal.com/api/getAsyncLocations",
        params=params,
        headers=headers,
    )
    
    datas = response.json()  # Extract JSON data from the response
    for r in datas:
        location = json.loads(
            r["info"]
            .strip()
            .replace("    ", "")
            .replace('<div class="tlsmap_popup">', "")
            .replace("</div>", "")
        )
        response_list.append(location)

  0%|          | 0/3 [00:00<?, ?it/s]

TypeError: byte indices must be integers or slices, not str

In [93]:
response.json()

{'lat': None,
 'lng': None,
 'originLat': '42.6875214',
 'originLng': '-71.1626756',
 'markers': [{'lat': '42.6839105',
   'lng': '-71.1353823',
   'tooltip': '',
   'info': '<div class="tlsmap_popup">{    "fid":"535",    "lid":"249414",    "address_1": "350 Winthrop Ave",    "address_2": "",    "city": "North Andover",    "region": "MA",    "post_code": "01845",    "location_name": "Kohl\'s North Andover",    "url": "/stores/ma/northandover-535.shtml",    "url_native": "https://stores.kohlslocal.com/ma/northandover/535.html",    "country":"US",    "lat": "42.6839105",    "lng": "-71.1353823"}</div>',
   'iconURL': '//assets.kohlslocal.com/images/mapIconSm.gif',
   'clickable': True,
   'locationId': 249414,
   'specialties': [{'name': 'Kids Room Shop',
     'order': '35',
     'group': 'Store Badges',
     'spid': '4602',
     'value': 1}]},
  {'lat': '42.7676375',
   'lng': '-71.2218975',
   'tooltip': '',
   'info': '<div class="tlsmap_popup">{    "fid":"538",    "lid":"249417",    

In [89]:
response

[{'lat': '44.4835377',
  'lng': '-88.064175',
  'tooltip': '',
  'info': '<div class="tlsmap_popup">{    "fid":"99",    "lid":"249181",    "address_1": "500 Bay Park Sq",    "address_2": "",    "city": "Ashwaubenon",    "region": "WI",    "post_code": "54304",    "location_name": "Kohl\'s Green Bay West",    "url": "/stores/wi/ashwaubenon-99.shtml",    "url_native": "https://stores.kohlslocal.com/wi/ashwaubenon/99.html",    "country":"US",    "lat": "44.4835377",    "lng": "-88.064175"}</div>',
  'iconURL': '//assets.kohlslocal.com/images/mapIconSm.gif',
  'clickable': True,
  'locationId': 249181,
  'specialties': [{'name': 'Kids Room Shop',
    'order': '35',
    'group': 'Store Badges',
    'spid': '4602',
    'value': 1}]},
 {'lat': '44.485195',
  'lng': '-87.9667112',
  'tooltip': '',
  'info': '<div class="tlsmap_popup">{    "fid":"111",    "lid":"249191",    "address_1": "2300 E Mason St",    "address_2": "",    "city": "Green Bay",    "region": "WI",    "post_code": "54302",   

In [87]:
location

{'fid': '685',
 'lid': '248737',
 'address_1': '5850 W Arizona Pavilions Dr',
 'address_2': '',
 'city': 'Tucson',
 'region': 'AZ',
 'post_code': '85743',
 'location_name': "Kohl's Marana",
 'url': '/stores/az/tucson-685.shtml',
 'url_native': 'https://stores.kohlslocal.com/az/tucson/685.html',
 'country': 'US',
 'lat': '32.3546425',
 'lng': '-111.0900053'}

In [82]:
src = pd.DataFrame(response_list).drop_duplicates(subset="fid")

In [83]:
src

Unnamed: 0,fid,lid,address_1,address_2,city,region,post_code,location_name,url,url_native,country,lat,lng
0,612,248694,185 Shenstone Blvd,,Garner,NC,27529,Kohl's Garner,/stores/nc/garner-612.shtml,https://stores.kohlslocal.com/nc/garner/612.html,US,35.6934547,-78.5800483
1,1307,249033,302 Hinton Oaks Blvd,,Knightdale,NC,27545,Kohl's Knightdale,/stores/nc/knightdale-1307.shtml,https://stores.kohlslocal.com/nc/knightdale/1307.html,US,35.7995452,-78.5123032
2,1313,249036,640 Lakestone Commons Ave,,Fuquay Varina,NC,27526,Kohl's Fuquay,/stores/nc/fuquayvarina-1313.shtml,https://stores.kohlslocal.com/nc/fuquayvarina/1313.html,US,35.5936352,-78.7637373
3,465,248623,2350 Walnut St,,Cary,NC,27518,Kohl's Cary,/stores/nc/cary-465.shtml,https://stores.kohlslocal.com/nc/cary/465.html,US,35.7541232,-78.7456724
4,466,248624,9700 Falls of Neuse Road,,Raleigh,NC,27615,Kohl's Falls Pointe,/stores/nc/raleigh-466.shtml,https://stores.kohlslocal.com/nc/raleigh/466.html,US,35.9046494,-78.6009464
5,721,248763,1301 Beaver Creek Commons Dr,,Apex,NC,27502,Kohl's Apex,/stores/nc/apex-721.shtml,https://stores.kohlslocal.com/nc/apex/721.html,US,35.7469967,-78.8815724
6,1322,249042,12620 Capital Blvd,,Wake Forest,NC,27587,Kohl's Wake Forest,/stores/nc/wakeforest-1322.shtml,https://stores.kohlslocal.com/nc/wakeforest/1322.html,US,35.9867994,-78.5304132
7,706,248751,7822 Alexander Promenade Pl,,Raleigh,NC,27617,Kohl's Brier Creek,/stores/nc/raleigh-706.shtml,https://stores.kohlslocal.com/nc/raleigh/706.html,US,35.9135403,-78.7795619
8,183,248498,255 Glensford Dr,,Fayetteville,NC,28314,Kohl's Fayetteville,/stores/nc/fayetteville-183.shtml,https://stores.kohlslocal.com/nc/fayetteville/183.html,US,35.0642815,-78.9572836
9,720,248762,5241 McFarland Rd,,Durham,NC,27707,Kohl's Durham,/stores/nc/durham-720.shtml,https://stores.kohlslocal.com/nc/durham/720.html,US,35.949026,-78.9912119


#### Create a mapping of state abbreviations to full state names using the us library

In [15]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [None]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [None]:
df['brand'] = place_formal

---

## Geography

#### Make it a geodataframe

In [10]:
df_geo = df.copy()

In [11]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [13]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [15]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [16]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [17]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [18]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [19]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)