# Get 7-Eleven locations

#### Load Python tools and Jupyter config

In [1]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
place = "7-eleven"
place_formal = "7-Eleven"
color = "#14734f"
today = pd.Timestamp.today().strftime("%Y-%m-%d")

---

## Scrape

#### Get a dataframe of ZIP Codes

In [4]:
zips_all = pd.read_json(
    "../../_reference/data/zips_reference_pop_gen.json"
).sort_values("population", ascending=False)

#### Get the most populous ZIPs and a sample of the rest

In [5]:
zips_top = zips_all.head(750)
zips_sample = zips_all.tail(len(zips_all) - 750).sample(750)
zips = pd.concat([zips_sample, zips_top]).reset_index(drop=True)

#### Headers for request

In [6]:
headers = {
    "Authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzbDNyZ2RVNWM1WnZzWWo5NUZHSXVleGF1NU50N0o1T1RmN1ZSUGZWIiwic2NvcGUiOiJyZWFkX3N0b3JlcyByZWFkX2NvbmZpZyByZXNldF9wYXNzd29yZCBlbWFpbF9zdWJzY3JpcHRpb25zIGNvbXBsaWFuY2VfcmVxdWVzdCIsImdyYW50X3R5cGUiOiJjbGllbnRfY3JlZGVudGlhbCIsImV4cCI6MTcxMDU1MDU2MywiaWF0IjoxNzEwNDY0MTYzfQ.UDt6RGO6mcvsRJz9ERu88aXknVZi5yH_2hBlPM6OZonfrzABDEEkatTPhUkFowp_KWleBvn_2XA2ydBh4hq1FUCu20pk_yl4v2HUZraFn1XGaz73YyGuHZ1Mx4pbtwtKjA7UsveIwF2Bg35UphI5FKUJFonpbsuRHoD_p9vy5rMEMLwwNz6fqSsjSvl54L8HyP_-eiShxBQ7c5V_kCzBhUEvDjDPxKjy4oT4oTwU33gKgDB19FAqU7Do7H2s14GOkf730pV0HKvdq7BeedulT3G70KzUOX-T1jbx04AFRSvvPa-v3LtHJUYYwqsAH35NImpc8e1H5-S0ZXtLcZCH4Q",
    "X-SEI-TRIP-ID": "MmU4ZDYzNDVjMmVkZDY5YzgxNzc1MzU5MGM5YjBjN2Q=",
}

In [7]:
locations_list = []

for index, row in tqdm(zips.iterrows(), total=zips.shape[0]):
    latitude = row["latitude"]
    longitude = row["longitude"]

    params = {
        "lat": latitude,
        "lon": longitude,
        "radius": "100",
        "limit": "500",
    }

    try:
        while True:
            response = requests.get(
                "https://api.7-eleven.com/v4/stores", params=params, headers=headers
            )
            response.raise_for_status()

            json_data = response.json()
            results = json_data.get("results", [])

            # Process each location from the response
            for location in results:
                # Extract only the keys we want and safely handle missing the missing ones
                location_info = {
                    key: location.get(key, None)
                    for key in [
                        "id",
                        "name",
                        "address",
                        "city",
                        "state",
                        "zip",
                        "lat",
                        "lon",
                        "phone",
                    ]
                }
                locations_list.append(location_info)

            # Check if there's a next page of results, and update params if needed
            next_page = json_data.get("next", None)
            if not next_page:
                break

            params["offset"] = next_page.split("offset=")[-1].split("&")[0]

    except requests.HTTPError as e:
        print(f"HTTP Error for {latitude}, {longitude}: {e}")
    except Exception as e:
        print(f"Unexpected error for {latitude}, {longitude}: {e}")

  0%|          | 0/1500 [00:00<?, ?it/s]

HTTP Error for 34.1046679609, -117.6616909362: 503 Server Error: Service Unavailable for url: https://api.7-eleven.com/v4/stores?lat=34.1046679609&lon=-117.6616909362&radius=100&limit=500&offset=500


#### Convert list to a dataframe

In [8]:
src = pd.DataFrame(locations_list).drop_duplicates()

#### Clean up dataframe and only include 7-Eleven stores (not its regional chains, like Stripes)

In [9]:
df = (
    src[src["name"].str.contains("7-11|7-Eleven")]
    .rename(columns={"address": "street", "lat": "latitude", "lon": "longitude"})
    .copy()
)

In [10]:
len(df)

8568

#### Create a mapping of state abbreviations to full state names using the us library

In [11]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [12]:
df["state_name"] = df["state"].map(state_mapping)

#### Make sure our brand name gets in the dataframe

In [13]:
df["brand"] = place_formal

#### Add fetch date

In [14]:
df["updated"] = today

---

## Geography

#### Make it a geodataframe

In [15]:
df_geo = df.copy()

In [24]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
).set_crs("EPSG:4326")

---

## Maps

#### US states background

In [25]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [26]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=5, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [27]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [28]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [29]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [30]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)

In [31]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations_{today}.geojson",
    driver="GeoJSON",
)