# Setup

Import the following packages. To get all the packages run `pip install -r requirements.txt`

In [1]:
# Import data processing libraries
import pandas as pd
import csv
import re
import requests
from bs4 import BeautifulSoup

# Scraping for Locations by State

Goal of this section is to obtain a dictionary of all states (and the number of Jersey Mikes stores) listed in the *Find Locations* section of the Jersey Mikes website.

Base website: https://www.jerseymikes.com/locations/all

In [2]:
# Create Beautiful Soup object to parse html content
url = 'https://www.jerseymikes.com/locations/all'
document_html = requests.get(url).text
soup = BeautifulSoup(document_html, 'html.parser')

In [3]:
# Filter for states by jersey mikes link and number of stores within state
states_abriev = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
                 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
                 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
                 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
                 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

states_content = soup.find('li', class_='pure-u-1-3 pure-u-md-1-5 pure-u-lg-1-6')

# Get store count
states_count = states_content.text.strip()
states_count = re.sub(r'[\n\t\s]*', '', states_count)
states_count = re.findall('\(([^)]+)', states_count)

states_count = {state: int(count) for state, count in zip(states_abriev, states_count)}

# Scraping for Addresses by State

Goal of this section is to obtain the addresses of all stores for every Jersey Mikes state locations. Noticeably, Jersey Mikes only fits 12 address location for a page, displaying the rest following page if needed. To compute the number of pages to scrape through for each state, we modulo the `states_count` values respectively with 12.

Base website: https://www.jerseymikes.com/locations/{state_abriev}

In [11]:
# Scrape addresses for each state, search through all pages for each state
locations = []
base_url = 'https://www.jerseymikes.com/locations/'
page_url = '?page='

for state in states_abriev:
    # Get page link of state
    state_url = base_url + state + page_url
    pages = states_count[state] % 12
    
    # Create new soup from current page
    for page in range(pages+1):
        state_url += str(page)
        document_html = requests.get(state_url).text
        soup = BeautifulSoup(document_html, 'html.parser')
        soup = soup.find_all('p', itemprop='address')
        locations += soup

KeyboardInterrupt: 

In [6]:
locations

[]