# Get AutoZone locations

#### Load Python tools and Jupyter config

In [1]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
place = "autozone"
place_formal = "AutoZone"
color = "#f26100"

---

## Read data

#### Import the county's largest ZIP Codes and ensure they have five digits

In [4]:
zips = (
    pd.read_json("../../_reference/data/zip_code_demographics_esri.json")
    .query("population > 5000")
    .sort_values("population", ascending=False)
    .reset_index(drop=True)
)
zips["zipcode"] = zips["zipcode"].astype(str).str.zfill(5)

#### Get a list of ZIP Codes

In [5]:
# zips_list = zips["zipcode"].sample(600).to_list()

In [6]:
top_zipcodes_by_state = (
    zips.groupby("state")
    .apply(lambda x: x.nlargest(20, "population"), include_groups=False)
    .reset_index(drop=True)
)

In [7]:
top_zips = top_zipcodes_by_state["zipcode"].to_list()
len(top_zips)

1020

#### Headers for requests

In [8]:
headers = {
    "authority": "liveapi.yext.com",
    "accept": "application/json, text/plain, */*",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
}

#### Loop through zips list to retrieve features within a radius

In [9]:
data_list = []

# Example loop setup, replace 'top_zips[0:3]' with your actual ZIP codes list
for z in tqdm(top_zips):
    offset = 0
    total_results = float("inf")

    while offset < total_results:
        params = {
            "location": z,
            "api_key": "a427dc0cb3e4f080da0ebe74621b8020",
            "v": "20180731",
            "radius": "100",
            "filters": '[{"countryCode":{"includes":["US","PR","VI"]}}]',
            "offset": offset,
        }

        response = requests.get(
            "https://liveapi.yext.com/v2/accounts/me/locations/geosearch",
            params=params,
            headers=headers,
        )
        response_data = response.json()

        # Check if 'response' exists to get to 'locations'
        if "response" in response_data and "locations" in response_data["response"]:
            locations_data = response_data["response"]["locations"]
            total_results = response_data["response"].get("count", 0)

            for location in locations_data:
                # Assuming the 'geo' information is consistent across all locations
                combined_data = {
                    "store_id": location.get("id"),
                    "street": location.get("address"),
                    "city": location.get("city"),
                    "state": location.get("state"),
                    "zip": location.get("zip"),
                    "phone": location.get("phone"),
                    "timezone": location.get("timezone"),
                    "url": location.get("websiteUrl"),
                    "latitude": location.get(
                        "displayLat"
                    ),  # Adjust based on actual key names
                    "longitude": location.get("displayLng"),
                }
                data_list.append(combined_data)

            offset += len(locations_data)
        else:
            break

# Create the DataFrame after collecting all data
df = pd.DataFrame(data_list).drop_duplicates()

  0%|          | 0/1020 [00:00<?, ?it/s]

#### The result:

In [None]:
df.head()

NameError: name 'df' is not defined

#### How many? 

In [None]:
len(df)

#### Create a mapping of state abbreviations to full state names using the us library

In [11]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [12]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [13]:
df["brand"] = place_formal

---

## Geography

#### Make it a geodataframe

In [14]:
df_geo = df.copy()

In [15]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [16]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [17]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [18]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [19]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [20]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [21]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)