# Get Aldi locations

#### Load Python tools and Jupyter config

In [1]:
%load_ext lab_black

In [2]:
import json
import requests
import pandas as pd
import altair as alt
import geopandas as gpd
from vega_datasets import data
from tqdm.notebook import tqdm, trange

## Read data

#### All the locations

In [3]:
place = "Aldi"

In [4]:
headers = {
    "authority": "stores.aldi.us",
    "accept": "application/json",
    "referer": "https://stores.aldi.us/stores?l=en",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
}

#### Loop over results to grab features

In [5]:
store_list = []

for o in tqdm(range(0, 2450, 50)):

    params = {
        "q": "39.099724,-94.578331",
        "r": "2000",
        "qp": "Kansas City, MO",
        "l": "en",
        "per": "50",
        "offset": f"{o}",
    }

    response = requests.get(
        "https://stores.aldi.us/stores",
        params=params,
        headers=headers
        # "https://stores.aldi.us/stores", params=params, cookies=cookies, headers=headers
    )

    for d in response.json()["response"]["entities"]:
        store_id = d["profile"]["meta"]["id"]
        address = d["profile"]["address"]["line1"]
        city = d["profile"]["address"]["city"]
        zipcode = d["profile"]["address"]["postalCode"]
        state = d["profile"]["address"]["region"]
        longitude = d["profile"]["c_longitude"]
        latitude = d["profile"]["c_latitude"]
        timezone = d["profile"]["timezone"]
        url = d["profile"]["websiteUrl"]
        store_no = d["profile"]["c_storeNum"]
        try:
            tags = d["profile"]["c_locatorFilterUS"]
        except:
            """"""

        try:
            opening_date = d["profile"]["c_openingDate"]
        except:
            """"""

        store_dict = {
            "store_id": store_id,
            "store_no": store_no,
            "longitude": longitude,
            "latitude": latitude,
            "address": address,
            "city": city,
            "state": state,
            "zipcode": zipcode,
            "opened": opening_date,
            "timezone": timezone,
            "tags": tags,
            "url": url,
            # "phone": phone,
        }
        store_list.append(store_dict)

  0%|          | 0/49 [00:00<?, ?it/s]

#### How many locations did we get?

In [6]:
print(f"Successfully downloaded data for {len(store_list)} stores!")

Successfully downloaded data for 2357 stores!


#### Get into a dataframe

In [7]:
df = pd.DataFrame(store_list)

#### The result

In [8]:
df.head()

Unnamed: 0,store_id,store_no,longitude,latitude,address,city,state,zipcode,opened,timezone,tags,url
0,U1167,31,-94.56309735774994,39.10418492640982,721 Paseo Blvd.,Kansas City,MO,64106,7/02/2007,America/Chicago,"[Parking Lot, Delivery]",https://stores.aldi.us/mo/kansas-city/721-pase...
1,U1177,92,-94.55421388149269,39.05629658551841,3830 Prospect Ave.,Kansas City,MO,64128,3/17/2021,America/Chicago,"[Parking Lot, Delivery]",https://stores.aldi.us/mo/kansas-city/3830-pro...
2,U0967,91,-94.5078248,39.1054214,6300 Independence Ave,Kansas City,MO,64125,11/15/2004,America/Chicago,"[Parking Lot, Beer, Wine, Delivery]",https://stores.aldi.us/mo/kansas-city/6300-ind...
3,U2829,59,-94.63905441100415,39.04054501421429,4801 Roe Blvd,Roeland Park,KS,66205,7/29/1998,America/Chicago,"[Parking Lot, Beer, Curbside, Delivery]",https://stores.aldi.us/ks/roeland-park/4801-ro...
4,U4276,108,-94.57422495978916,39.18416926246301,4851 N. Oak Trafficway,Kansas City,MO,64118,11/15/2023,America/Chicago,"[Parking Lot, Beer, Wine, Curbside, Delivery]",https://stores.aldi.us/mo/kansas-city/4851-n-o...


---

## Geography

#### Make a geodataframe from lon/lat

In [9]:
df_geo = df.copy()

In [10]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
).set_crs("epsg:4326")

---

## Maps

#### US states background

In [11]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place} locations")
    .project("albersUsa")
)

#### Location points map

In [12]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=10, color="red")
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [13]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value("red"),
        tooltip=["state:N", "count:Q"],
    )
    .properties(title=f"Number of {place} in US, by average lon/lat of locations")
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### CSV

In [14]:
df.to_csv(f"data/processed/{place.lower()}_locations.csv", index=False)

#### JSON

In [15]:
df.to_json(f"data/processed/{place.lower()}_locations.json", indent=4, orient="records")

#### GeoJSON

In [16]:
col = "tags"
gdf.loc[:, gdf.columns != col].to_file(
    f"data/processed/{place.lower()}_locations.geojson", driver="GeoJSON"
)