# Get CVS locations

#### Load Python tools and Jupyter config

In [1]:
import us 
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
place = "cvs"
place_formal = "CVS"
color = "#cc0000"
latitude = "39.106667"
longitude = "-94.676392"

## Scrape

#### Headers for the request

In [4]:
headers = {
    "authority": "www.cvs.com",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "x-api-key": "k6DnPo1puMOQmAhSCiRGYvzMYOSFu903",
}

#### Import the county's largest ZIP Codes and ensure they have five digits

In [5]:
zips = (
    pd.read_json("../../_reference/data/zip_code_demographics_esri.json")
    .query("population > 5000")
    .sort_values("population", ascending=False)
    .reset_index(drop=True)
)
zips["zipcode"] = zips["zipcode"].astype(str).str.zfill(5)

#### Get a list of ZIP Codes

In [6]:
top = zips["zipcode"].head(2000).to_list()
bottom = zips["zipcode"].tail(2000).to_list()
zips_list = top + bottom

In [7]:
dfs = []

for s in tqdm(zips_list):
    params = {
        "searchBy": "USER-TEXT",
        "searchText": f"{s}",
        "searchRadiusInMiles": "1000",
        "maxItemsInResult": "1000",
        "resultsPerPage": "1000",
    }

    response = requests.get(
        "https://www.cvs.com/api/locator/v2/stores/search",
        params=params,
        headers=headers,
    )

    try:
        store_list = response.json()["storeList"]
        d = pd.DataFrame(store_list)[["address", "storeInfo"]]
        dfs.append(d)
    except KeyError:
        print(f"No storeList found for zip code {s}")

# Concatenate all dataframes in the list
final_df = pd.concat(dfs, ignore_index=True)

  0%|          | 0/4000 [00:00<?, ?it/s]

No storeList found for zip code 87557
No storeList found for zip code 23924
No storeList found for zip code 48893
No storeList found for zip code 76579
No storeList found for zip code 74120
No storeList found for zip code 31301


In [8]:
src_df = pd.concat(dfs).reset_index(drop=True)

#### Deal with nested columns

In [10]:
src_df[["street", "intersection", "city", "state", "zip", "country", "areaCode"]] = (
    pd.json_normalize(src_df["address"])
)

In [11]:
src_df[
    [
        "storeId",
        "storeType",
        "latitude",
        "longitude",
        "faxNumber",
        "phoneNumbers",
        "identifier",
        "distance",
    ]
] = pd.json_normalize(src_df["storeInfo"])

In [12]:
src_df["phone"] = [
    item.get("retail", None) for sublist in src_df["phoneNumbers"] for item in sublist
]

In [13]:
src_df["street"] = src_df["street"].str.title()
src_df["city"] = src_df["city"].str.title()

In [14]:
df = (
    src_df[
        [
            "storeId",
            "street",
            "city",
            "state",
            "zip",
            "latitude",
            "longitude",
            "phone",
        ]
    ]
    .rename(columns={"storeId": "store_id"})
    .drop_duplicates()
    .reset_index(drop=True)
    .copy()
)

In [15]:
len(df)

9333

In [16]:
df.head()

Unnamed: 0,store_id,street,city,state,zip,latitude,longitude,phone
0,2249,10522 Spring Green Blvd.,Katy,TX,77494,29.7142,-95.8134,2813923352.0
1,10177,2720 Fm 1463,Katy,TX,77494,29.752,-95.852,2817699255.0
2,17027,23710 Westheimer Pkwy,Katy,TX,77494,29.7364,-95.7759,
3,6747,602 W. Grand Pkwy South,Katy,TX,77494,29.7716,-95.7784,2813923808.0
4,10544,5902 Fm 1463 Rd.,Katy,TX,77494,29.7066,-95.8465,2812327014.0


#### Create a mapping of state abbreviations to full state names using the us library

In [17]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [18]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [19]:
df["brand"] = place_formal

---

## Geography

#### Make it a geodataframe

In [20]:
df_geo = df.copy()

In [21]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [22]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [23]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [24]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [25]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [26]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [27]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)