# Get Pinkberry locations

#### Load Python tools and Jupyter config

In [1]:
import re
import us
import json
import black
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from bs4 import BeautifulSoup
from vega_datasets import data
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
place = "pinkberry"
place_formal = "Pinkberry"
color = "#E42545"
latitude = "39.106667"
longitude = "-94.676392"

## Read data

#### Snag store locations from inline json [on this map](https://locator.kahalamgmt.com/locator/index.php?brand=24&mode=map)

In [4]:
data_list = []

response = requests.get(
    f"https://locator.kahalamgmt.com/locator/index.php?brand=24&mode=map&latitude=34.00125000&longitude=-118.43066000&q=90066&pagesize=1000"
)
soup = BeautifulSoup(response.text, "html.parser")
stores_list = soup.findAll("script", type="text/javascript")[1:72]

In [5]:
for s in stores_list:
    try:
        script_content = s.string.replace(
            "// generated by including pagesize parameter", ""
        )
    except:
        continue
    match = re.search(r"Locator\.stores\['\d+'\] = ({.*?});", script_content)
    if match:
        json_content = match.group(1)

        # Load JSON content into a dictionary
        store_data = json.loads(json_content)
        data_list.append(store_data)
    else:
        print(f"No JSON content found in script: {script_content}")

In [6]:
src = pd.DataFrame(data_list)
src.columns = src.columns.str.lower()

In [7]:
df = src[
    [
        "storeid",
        "latitude",
        "longitude",
        "address",
        "city",
        "state",
        "statename",
        "zip",
        "phone",
        "locationtype",
    ]
].copy()

In [8]:
df.head()

Unnamed: 0,storeid,latitude,longitude,address,city,state,statename,zip,phone,locationtype
0,15047,33.979537,-118.438612,"4708 1/2 Admiralty Way, #A-3",Marina del Rey,CA,California,90292,(310) 827-3900,Street Front
1,15180,33.985441,-118.395128,6000 Sepulveda Blvd,Culver City,CA,California,90230,(310) 390-2333,Mall
2,15037,34.033976,-118.492749,"1612 Montana Ave,",Santa Monica,CA,California,90403,(310) 264-4791,Street Front
3,15329,33.941589,-118.40853,"1 World Way, LAX,",Los Angeles,CA,California,90045,,Airport
4,15020,34.063521,-118.398896,"240 S Beverly Dr, #A",Beverly Hills,CA,California,90212,(310) 734-7543,Street Front


In [9]:
len(df)

70

---

## Geography

#### Make it a geodataframe

In [10]:
df_geo = df.copy()

In [11]:
gdf = gpd.GeoDataFrame(
    df_geo, geometry=gpd.points_from_xy(df_geo.longitude, df_geo.latitude)
)

---

## Maps

#### US states background

In [12]:
background = (
    alt.Chart(alt.topo_feature(data.us_10m.url, feature="states"))
    .mark_geoshape(fill="#e9e9e9", stroke="white")
    .properties(width=800, height=500, title=f"{place_formal} locations")
    .project("albersUsa")
)

#### Location points map

In [13]:
points = (
    alt.Chart(gdf)
    .mark_circle(size=10, color=color)
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
    )
)

point_map = background + points
point_map.configure_view(stroke=None)

#### Location proportional symbols map

In [14]:
symbols = (
    alt.Chart(gdf)
    .transform_aggregate(
        latitude="mean(latitude)",
        longitude="mean(longitude)",
        count="count()",
        groupby=["state"],
    )
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.Size("count:Q", title="Count by state"),
        color=alt.value(color),
        tooltip=["state:N", "count:Q"],
    )
    .properties(
        title=f"Number of {place_formal} in US, by average lon/lat of locations"
    )
)

symbol_map = background + symbols
symbol_map.configure_view(stroke=None)

---

## Exports

#### JSON

In [15]:
df.to_json(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.json",
    indent=4,
    orient="records",
)

#### CSV

In [16]:
df.to_csv(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.csv", index=False
)

#### GeoJSON

In [17]:
gdf.to_file(
    f"data/processed/{place.lower().replace(' ', '_')}_locations.geojson",
    driver="GeoJSON",
)