# Get Menchie's locations

#### Load Python tools and Jupyter config

In [3]:
%load_ext lab_black

In [165]:
import pandas as pd
import os
import requests
import json

os.environ["USE_PYGEOS"] = "0"
import geopandas as gpd
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm, trange
import usaddress

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1020
pd.options.display.max_colwidth = None

---

## Scrape

#### Get details about each location from its directory

In [167]:
url = "https://www.menchies.com/all-locations#intl"

In [168]:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
location_list = soup.findAll("div", class_="loc-info")

In [169]:
loc_list = []

for loc in location_list:
    # Extracting directions element
    directions = loc.find("p", class_="loc-directions")

    if directions:
        # Check if coords element is not None before using it
        coords_element = directions.find("a")
        coords = (
            coords_element["href"].replace("https://maps.google.com/?daddr=", "")
            if coords_element
            else None
        )

        # Extracting phone element
        phone_element = loc.find("p", class_="loc-phone")
        phone = phone_element.get_text() if phone_element else None

        # Creating loc_dict with checks for coords and phone
        loc_dict = {
            "location": loc.find("a").text,
            "url": "https://www.menchies.com/" + loc.find("a")["href"],
            "address": loc.find("div", class_="loc-address")
            .get_text(separator=", ")
            .strip(),
            "coords": coords if coords is not None else "",
            "phone": phone if phone is not None else "",
        }

        # Append loc_dict to loc_list
        loc_list.append(loc_dict)
    else:
        print(f"Directions not found for a location.")

Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.
Directions not found for a location.


In [191]:
df = pd.DataFrame(loc_list)

In [192]:
len(df)

324

In [193]:
df["street"] = df["address"].str.split(", ", expand=True)[0]
df["unit"] = df["address"].str.split(", ", expand=True)[1]
df["city"] = df["address"].str.split(", ", expand=True)[2]
df["state"] = df["address"].str.split(", ", expand=True)[3]

In [194]:
df[["latitude", "longitude"]] = df["coords"].str.split(", ", expand=True)

In [196]:
def parse_address(address):
    try:
        parsed_address = usaddress.tag(address)[0]
        return {
            "number": parsed_address.get("AddressNumber", ""),
            "street": parsed_address.get("StreetName", ""),
            "unit": parsed_address.get("OccupancyIdentifier", ""),
            "city": parsed_address.get("PlaceName", ""),
            "state": parsed_address.get("StateName", ""),
            "zip": parsed_address.get("ZipCode", ""),
        }
    except Exception as e:
        print(f"Error parsing address: {address}")
        return ""


# Apply the parse_address function to the 'address' column
df["parsed_address"] = df["address"].apply(parse_address)

Error parsing address: 8544 US-42 #100, Suite 100, Florence, KY 41042
Error parsing address: 2004 50th Ave., Unit 117, Red Deer, AB AB T4R 3A2
Error parsing address: 525 Ninth St. East , Cornwall , ON K6H 0A3
Error parsing address: 158 Guelph Street, Unit 1, Georgetown, ON L7G 4A6
Error parsing address: 518 St. Clair Ave. West, (St. Clair just west of Bathurst St.), Toronto, ON M6C 1A2
Error parsing address: 20 Broadleaf Avenue, (Just North of Brock St. N and Taunton Rd.), Whitby, ON L1R 0B5


In [197]:
parsed_address_df = df["parsed_address"].apply(pd.Series)

# Explicitly specify the dtype for each column
column_dtype_mapping = {
    "number": "object",
    "street": "str",
    "unit": "str",
    "city": "str",
    "state": "str",
    "zip": "str",
}

parsed_address_df = parsed_address_df.astype(column_dtype_mapping)

# Replace None values with a default value, e.g., an empty string
parsed_address_df = parsed_address_df.fillna("")

# Concatenate the original DataFrame with the parsed_address_df
df = pd.concat([df, parsed_address_df], axis=1)

# Drop the original "parsed_address" column if needed
df = df.drop(columns=["parsed_address"])

# Display the updated DataFrame
df

Unnamed: 0,location,url,address,coords,phone,street,unit,city,state,latitude,longitude,number,street.1,unit.1,city.1,state.1,zip,0
0,Hillside Plaza,https://www.menchies.com//locations/frozen-yogurt-hillside-plaza-ak,"9000 Lake Otis Parkway, Unit 4, Anchorage, AK 99507","61.139147, -149.834163",(907) 929-9977,9000 Lake Otis Parkway,Unit 4,Anchorage,AK 99507,61.139147,-149.834163,9000,Lake Otis,4,Anchorage,AK,99507,
1,Gilbert Gateway Towne Center,https://www.menchies.com//locations/frozen-yogurt-gilbert-gateway-towne-center-az,"5022 S. Power Road, Ste. 108 , Gilbert, AZ 85212","33.324948, -111.688498",(480) 783-2441,5022 S. Power Road,Ste. 108,Gilbert,AZ 85212,33.324948,-111.688498,5022,Power,108,Gilbert,AZ,85212,
2,San Tan Village,https://www.menchies.com//locations/frozen-yogurt-san-tan-village-az,"3131 S. Market Street, Suite 111, Gilbert, AZ 85295","33.297007, -111.748993",(480) 993-3336,3131 S. Market Street,Suite 111,Gilbert,AZ 85295,33.297007,-111.748993,3131,Market,111,Gilbert,AZ,85295,
3,Stockton Hill,https://www.menchies.com//locations/frozen-yogurt-stockton-hill-az,"3535 Stockton Hills Rd., Kingman, AZ 86409","35.226530, -114.036959",(928) 263-6646,3535 Stockton Hills Rd.,Kingman,AZ 86409,,35.226530,-114.036959,3535,Stockton Hills,,Kingman,AZ,86409,
4,The QC District,https://www.menchies.com//locations/frozen-yogurt-the-qc-district--az,"21295 S. Ellsworth Loop Road, Suite 104, Queen Creek, AZ 85142","33.255009, -111.636543",(480) 784-3943,21295 S. Ellsworth Loop Road,Suite 104,Queen Creek,AZ 85142,33.255009,-111.636543,21295,Ellsworth Loop,104,Queen Creek,AZ,85142,
5,Scottsdale Towne Center,https://www.menchies.com//locations/frozen-yogurt-scottsdale-towne-center-az,"15678 N. Frank Lloyd Wright Blvd., Suite 120, Scottsdale, AZ 85260","33.62879, -111.887037",(480) 451-2891,15678 N. Frank Lloyd Wright Blvd.,Suite 120,Scottsdale,AZ 85260,33.62879,-111.887037,15678,Frank Lloyd Wright,120,Scottsdale,AZ,85260,
6,The Collective,https://www.menchies.com//locations/frozen-yogurt-the-collective-az,"1128 E. Baseline Road, Tempe, AZ 85283","33.379803, -111.922992",(480)307-6037,1128 E. Baseline Road,Tempe,AZ 85283,,33.379803,-111.922992,1128,Baseline,,Tempe,AZ,85283,
7,Alcoa Exchange Center,https://www.menchies.com//locations/frozen-yogurt-alcoa-exchange-center-ak,"7301 Alcoa Road, Suite 5 , Bryant, AR 72002","34.603513, -92.533662",(501) 794-6070,7301 Alcoa Road,Suite 5,Bryant,AR 72002,34.603513,-92.533662,7301,Alcoa,5,Bryant,AR,72002,
8,Arroyo Grande Center,https://www.menchies.com//locations/frozen-yogurt-arroyo-grande-center-ca,"1229 East Grand Ave., Ste. 103, Arroyo Grande, CA 93420","35.120187, -120.601813",(805) 270-4920,1229 East Grand Ave.,Ste. 103,Arroyo Grande,CA 93420,35.120187,-120.601813,1229,Grand,103,Arroyo Grande,CA,93420,
9,River Lakes Village - Bakersfield,https://www.menchies.com//locations/frozen-yogurt-river-lakes-village-ca,"4560-B Coffee Road, Bakersfield, CA 93308","35.401173, -119.090949",(661)615-3014,4560-B Coffee Road,Bakersfield,CA 93308,,35.401173,-119.090949,4560-B,Coffee,,Bakersfield,CA,93308,


---

## Exports

In [None]:
gdf.to_file("data/processed/ulta.geojson", driver="GeoJSON")

In [None]:
df.to_json("data/processed/ulta.json", orient="records", indent=4)

In [None]:
df.to_csv("data/processed/ulta.csv", index=False)