# Get Abercrombie & Fitch locations

#### Load Python tools and Jupyter config

In [1]:
%load_ext lab_black

In [2]:
import json
import requests
import pandas as pd
import geopandas as gpd
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm, trange
import re
import numpy as np

In [3]:
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
pd.options.display.max_colwidth = None

## Read data

#### Snag a list of ZIP Codes

In [7]:
zips = gpd.read_file("../_reference/data/zips_centroids.geojson")

In [9]:
zips["lon"] = zips.centroid.map(lambda p: p.x)
zips["lat"] = zips.centroid.map(lambda p: p.y)


  zips["lon"] = zips.centroid.map(lambda p: p.x)


AttributeError: 'NoneType' object has no attribute 'x'

In [None]:
zips.columns = zips.columns.str.lower()

In [None]:
zips_slim = (
    (
        zips[["zip", "lon", "lat", "po_name", "state", "pop2012"]]
        .drop_duplicates()
        .copy()
    )
    .sort_values("pop2012", ascending=False)
    .reset_index(drop=True)
)

In [None]:
zips_slim

#### Loop through the list to set a search radius in each state (takes ~20 mins)

In [None]:
headers = {
    "authority": "www.abercrombie.com",
    "accept": "application/json, text/javascript, */*; q=0.01",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

In [None]:
data_list = []

for index, row in zips_slim.head(500).iterrows():
    lat_value = round(row["lat"], 5)
    long_value = round(row["lon"], 5)

    params = {
        "country": "US",
        "latitude": f"{lat_value}",
        "longitude": f"{long_value}",
        "radius": "100",
        "radiusUOM": "SMI",
    }

    response = requests.get(
        "https://www.abercrombie.com/api/ecomm/a-us/storelocator/search",
        params=params,
        headers=headers,
    )

    responses = response.json()["physicalStores"]
    for r in responses:
        responses_dict = {
            "storeNumber": r["storeNumber"],
            "name": r["name"],
            "address": r["addressLine"][0],
            "city": r["city"],
            "state": r["stateOrProvinceName"],
            "zip": r["postalCode"],
            "phone": r["telephone"],
            "latitude": r["latitude"],
            "longitude": r["longitude"],
            "etc": r["physicalStoreAttribute"],
        }
        data_list.append(responses_dict)

In [None]:
src = pd.DataFrame(data_list)

#### Explode the nested list in the `etc` attribute column

In [None]:
data = src["etc"].apply(lambda x: [x])
flat_df = pd.json_normalize(data.explode(), sep="_")
result_df = pd.concat([src, flat_df], axis=1).drop(columns="etc")

#### Extract a couple values from it

In [None]:
result_df["brand"] = pd.json_normalize(result_df[7])["value"]
result_df["open_date"] = pd.to_datetime(pd.json_normalize(result_df[10])["value"])

#### Clean dataframe

In [None]:
df = result_df[
    [
        "storeNumber",
        "name",
        "address",
        "city",
        "state",
        "zip",
        "phone",
        "latitude",
        "longitude",
        "brand",
        "open_date",
    ]
].copy()

In [None]:
df.head()

#### There are many dupes

In [None]:
df = df.drop_duplicates(subset="storeNumber")

In [None]:
len(df)

#### Sometimes there are differently branded stores at same location (KID vs. ACF), [like these two](https://www.abercrombie.com/shop/StoreLocator?storeId=10051&catalogId=10901&langId=-1), not dupes

In [None]:
df[df["address"] == "6170 W. Grand Avenue"]

---

## Aggregate

#### Counts by city

In [None]:
df_grouped = (
    df.groupby(["city", "state"])
    .agg({"storeNumber": "count"})
    .reset_index()
    .rename(columns={"storeNumber": "count"})
    .sort_values("count", ascending=False)
).reset_index(drop=True)

In [None]:
df_grouped.head(20)

---

## Geography

#### Make a geodataframe from lon/lat

In [None]:
df_geo = df.copy()

In [None]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
).set_crs("epsg:4326")

---

## Maps

#### US states background

In [None]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place} locations")
    .project("albersUsa")
)

#### Location points map

In [None]:
points = (
    alt.Chart(df_geo)
    .mark_circle(size=10, color="red")
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [None]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state_abbr"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value("red"),
        tooltip=["state:N", "count:Q"],
    )
    .properties(title=f"Number of {place} in US, by average lon/lat of locations")
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### CSV

In [None]:
df.to_csv("data/processed/abercrombie_fitch_locations.csv", index=False)

#### JSON

In [None]:
df.to_json(
    "data/processed/abercrombie_fitch_locations.json", indent=4, orient="records"
)

#### GeoJSON

In [None]:
gdf.to_file("data/processed/abercrombie_fitch_locations.geojson", driver="GeoJSON")