# Get Bass Pro Shops locations

#### Load Python tools and Jupyter config

In [1]:
import us 
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
place = "bass-pro-shops"
place_formal = "Bass Pro Shops/Cabela's"
bass_color = "#1F8500"
cabelas_color = "#0058a0"

## Scrape

#### Headers for requests

In [4]:
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}

#### Get inline script with location content and read it as json

In [5]:
response = requests.get("https://stores.basspro.com", headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

In [6]:
json_data = json.loads(soup.find("script", class_="js-map-config").string)

#### Read the json (and just stores in the US) as a dataframe.

In [7]:
src = (
    pd.DataFrame(json_data["locs"])[
        [
            "id",
            "altTagText",
            "latitude",
            "longitude",
            "website",
        ]
    ]
    .query('website.str.contains(".com")')
    .rename(columns={"altTagText": "street", "id": "store_id"})
).reset_index(drop=True)

#### Hmmmn. The addresses are *fine* but I'd rather not parse them

In [8]:
src.iloc[10]

store_id                                                               3011546
street                             Location at 1000 Bass Pro Dr NW Altoona, IA
latitude                                                             41.659134
longitude                                                           -93.514686
website      https://stores.basspro.com/us/ia/altoona/1000-bass-pro-dr-nw.html
Name: 10, dtype: object

#### Get better features from each location page

In [9]:
locations_list = []

for page in tqdm(src["website"].to_list()):
    page_response = requests.get(page)
    page_soup = BeautifulSoup(page_response.text, "html.parser")

    latitude = page_soup.find("meta", itemprop="latitude")["content"]
    longitude = page_soup.find("meta", itemprop="longitude")["content"]
    street = page_soup.find("span", class_="c-address-street-1").text.strip()
    city = page_soup.find("span", class_="c-address-city").text.strip().replace(",", "")
    state = page_soup.find("abbr", class_="c-address-state").text.strip()
    zip = page_soup.find("span", class_="c-address-postal-code").text.strip()
    phone = page_soup.find("a", class_="c-phone-number-link").text.strip()

    locations_dict = {
        "latitude": latitude,
        "longitude": longitude,
        "street": street,
        "city": city,
        "state": state,
        "zip": zip,
        "phone": phone,
    }

    locations_list.append(locations_dict)

  0%|          | 0/155 [00:00<?, ?it/s]

#### Read list of dictionaries as a dataframe

In [10]:
locations_src = pd.DataFrame(locations_list)

#### The result? 

In [11]:
locations_src.head()

Unnamed: 0,latitude,longitude,street,city,state,zip,phone
0,37.1793826,-93.2971804,1 Bass Pro Dr,Springfield,MO,65807,(417) 887-7334
1,36.6414441,-93.2135224,1 Bass Pro Dr,Branson,MO,65616,(417) 243-5200
2,42.0847195,-71.2727308,1 Bass Pro Dr,Foxborough,MA,2035,(508) 216-2000
3,34.658703,-92.4092138,1 Bass Pro Dr,Little Rock,AR,72210,(501) 954-4500
4,41.1775335,-73.1781721,1 Bass Pro Dr,Bridgeport,CT,6608,(203) 362-4200


#### How many locations?

In [12]:
len(locations_src)

155

In [13]:
locations_src.iloc[10]

latitude              41.6591344
longitude            -93.5146862
street       1000 Bass Pro Dr NW
city                     Altoona
state                         IA
zip                        50009
phone             (515) 957-5500
Name: 10, dtype: object

#### Merge store IDs, websites with clean addresses on the shared index

In [14]:
df = pd.merge(
    locations_src, src[["store_id", "website"]], left_index=True, right_index=True
)[
    [
        "store_id",
        "street",
        "city",
        "state",
        "zip",
        "phone",
        "latitude",
        "longitude",
        "website",
    ]
].copy()

#### Add brands using strings in URLs

In [15]:
df.loc[df["website"].str.contains("bass"), "brand"] = "Bass Pro Shops"
df.loc[df["website"].str.contains("cabelas"), "brand"] = "Cabela's"
df.loc[df["brand"] == "Bass Pro Shops", "color"] = bass_color
df.loc[df["brand"] == "Cabela's", "color"] = cabelas_color

#### How many of each brand?

In [16]:
df.brand.value_counts()

brand
Bass Pro Shops    90
Cabela's          65
Name: count, dtype: int64

#### Create a mapping of state abbreviations to full state names using the us library

In [17]:
state_mapping = {state.abbr: state.name for state in us.states.STATES}

#### New column of full state names based on abbreviations

In [18]:
df["state_name"] = df["state"].map(state_mapping)

---

## Geography

#### Make it a geodataframe

In [19]:
df_geo = df.copy()

In [20]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [21]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [22]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=60)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        color=alt.Color("brand:N", legend=alt.Legend(title="Brand", orient="top")),
    )
)

point_map = background + points
point_map = point_map.configure_view(stroke=None).configure_legend(
    orient="top", direction="horizontal", titleAnchor="start"
)

In [23]:
point_map

#### Location proportional symbols map

In [24]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(bass_color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [25]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [26]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [27]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)