# Import libraries

In [2]:
# uncomment to install libraries
# ! pip install numpy pandas matplotlib seaborn requests selenium bs4
# ! pip install jupyter_contrib_nbextensions && jupyter contrib nbextension install 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import selenium
from bs4 import BeautifulSoup
import time

# Test web scraping escape room reviews

## World of Escapes map page URL

In [81]:
map_page_url = "https://worldofescapes.com/map"

# also home page
home_page_url = "https://worldofescapes.com"

## Map page response

In [8]:
map_page_response = requests.get(map_page_url)

## Map page soup

In [9]:
map_page_soup = BeautifulSoup(map_page_response.text, "html.parser")

## Map states and city href and URLs

In [82]:
# dictionary container for states and their city href and urls
state_city_href = {}
state_city_urls = {}

# loop through states and add their city href
for state_item_i in map_page_soup.find_all("div", {"class": "col-lg-3 col-md-4 col-sm-6 col-xs-6 state-item"}):
    # state str
    state_i = state_item_i.find("h3").get_text().strip().lower()
    
    # add states to href and url dictionaries
    state_city_href[state_i] = []
    state_city_urls[state_i] = []
    
    # add cities to state values
    for city_item_j in state_item_i.find_all("li", {"class": "city-item"}):
        state_city_href[state_i].append(city_item_j.a["href"])
        state_city_urls[state_i].append(home_page_url + city_item_j.a["href"])

# See URLs (which incorporate href)
state_city_urls

{'alabama': ['https://worldofescapes.com/auburn',
  'https://worldofescapes.com/birm',
  'https://worldofescapes.com/daleville',
  'https://worldofescapes.com/dothan',
  'https://worldofescapes.com/florence-al',
  'https://worldofescapes.com/gadsden',
  'https://worldofescapes.com/henagar',
  'https://worldofescapes.com/huntsville',
  'https://worldofescapes.com/mobile',
  'https://worldofescapes.com/montgomery',
  'https://worldofescapes.com/gulf-shores',
  'https://worldofescapes.com/oxford-al',
  'https://worldofescapes.com/tuscaloosa'],
 'alaska': ['https://worldofescapes.com/anchorage',
  'https://worldofescapes.com/fairbanks',
  'https://worldofescapes.com/juneau',
  'https://worldofescapes.com/skagway'],
 'arizona': ['https://worldofescapes.com/cottonwood',
  'https://worldofescapes.com/flagstaff',
  'https://worldofescapes.com/lake-havasu-city',
  'https://worldofescapes.com/phoenix',
  'https://worldofescapes.com/prescott',
  'https://worldofescapes.com/tucson',
  'https://wor

## Map states and city page responses and soups

In [98]:
# containers for state and their city page responses and soups
state_city_responses = {}
state_city_soups = {}

# loop through responses and soups
# just alabama and michigan for now
for state_key, city_url in {state: state_city_urls[state] for state in ("alabama", "michigan")}.items():
    # add state key to dictionary containers
    state_city_responses[state_key] = []
    state_city_soups[state_key] = []
    
    for url in city_url:
        # get response
        response_j = requests.get(url)
        
        # make soup
        soup_j = BeautifulSoup(response_j.text, "html.parser")
        
        # append response and soup to state
        state_city_responses[state_key].append(response_j)
        state_city_soups[state_key].append(soup_j)
        
        # sleep for a random number of seconds
        sleep_time_j = np.random.randint(low = 1, high = 5, size = 1)
        time.sleep(sleep_time_j)
        
        # print loop summary
        print("Made soup for {} in {}. Sleeping for {} seconds...".format(url, state_key, sleep_time_j))

Made soup for https://worldofescapes.com/auburn in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/birm in alabama. Sleeping for [2] seconds...
Made soup for https://worldofescapes.com/daleville in alabama. Sleeping for [2] seconds...
Made soup for https://worldofescapes.com/dothan in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/florence-al in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/gadsden in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/henagar in alabama. Sleeping for [3] seconds...
Made soup for https://worldofescapes.com/huntsville in alabama. Sleeping for [3] seconds...
Made soup for https://worldofescapes.com/mobile in alabama. Sleeping for [2] seconds...
Made soup for https://worldofescapes.com/montgomery in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/gulf-shores in alabama. Sleeping for [1] seconds...
Made soup f

## Escape room href and URLs

In [103]:
for a in state_city_soups["alabama"][0].find("div", {"data-content": "other-quests"}).find_all("a", {"class": "item-hover quest_tile_hover_link"}):
    print(a["href"])

/auburn/quests/auburn-escape-zones-the-cabin
/auburn/quests/auburn-escape-zones-imprisoned
/auburn/quests/auburn-escape-zones-the-puzzler-vs-superheroes
/auburn/quests/auburn-escape-zones-vault
/auburn/quests/auburn-escape-zones-black-beards-brig


In [123]:
state_city_soups["michigan"][3].find("div", {"data-content": "other-quests"})

In [124]:
# containers for state and their city page responses and soups
state_city_room_urls = {}

# loop through responses and soups
# just alabama and michigan for now
for state_key, city_soup in {state: state_city_soups[state] for state in ("alabama", "michigan")}.items():
    # add state key to dictionary containers
    state_city_room_urls[state_key] = {}

    for j, soup in enumerate(city_soup):
        # add dictionary container to each city href value
        # nested dictionaries, e.g., {"alabama": "/auburn": [NEW escape room href go here, ...]}
        state_city_room_urls[state_key][state_city_href[state_key][j]] = []
        
        # loop through a tags with escape room href
        
        if soup.find("div", {"data-content": "other-quests"}) is not None:
            for a in soup.find("div", {"data-content": "other-quests"}).find_all("a", {"class": "item-hover quest_tile_hover_link"}):
                
                # add href
                state_city_room_urls[state_key][state_city_href[state_key][j]].append(a["href"])
                
                # print loop summary
                print("Added href {} for {} in {}.".format(a["href"], state_city_href[state_key][j], state_key))
                
        else:
            print("None")

Added href /auburn/quests/auburn-escape-zones-the-cabin for /auburn in alabama.
Added href /auburn/quests/auburn-escape-zones-imprisoned for /auburn in alabama.
Added href /auburn/quests/auburn-escape-zones-the-puzzler-vs-superheroes for /auburn in alabama.
Added href /auburn/quests/auburn-escape-zones-vault for /auburn in alabama.
Added href /auburn/quests/auburn-escape-zones-black-beards-brig for /auburn in alabama.
Added href /birm/quests/Locked-In-THE-LABORATORY for /birm in alabama.
Added href /birm/quests/locked-in-birm-the-classroom for /birm in alabama.
Added href /birm/quests/Locked-In-THE-WAREHOUSE for /birm in alabama.
Added href /birm/quests/nightmare-at-3008-3008-house for /birm in alabama.
Added href /birm/quests/nightmare-at-3008-camp-mary-lee for /birm in alabama.
Added href /birm/quests/nightmare-at-3008-the-underground for /birm in alabama.
Added href /birm/quests/train-escape-sabotage for /birm in alabama.
Added href /birm/quests/train-escape-classified for /birm in 

In [125]:
state_city_room_urls

{'alabama': {'/auburn': ['/auburn/quests/auburn-escape-zones-the-cabin',
   '/auburn/quests/auburn-escape-zones-imprisoned',
   '/auburn/quests/auburn-escape-zones-the-puzzler-vs-superheroes',
   '/auburn/quests/auburn-escape-zones-vault',
   '/auburn/quests/auburn-escape-zones-black-beards-brig'],
  '/birm': ['/birm/quests/Locked-In-THE-LABORATORY',
   '/birm/quests/locked-in-birm-the-classroom',
   '/birm/quests/Locked-In-THE-WAREHOUSE',
   '/birm/quests/nightmare-at-3008-3008-house',
   '/birm/quests/nightmare-at-3008-camp-mary-lee',
   '/birm/quests/nightmare-at-3008-the-underground',
   '/birm/quests/train-escape-sabotage',
   '/birm/quests/train-escape-classified',
   '/birm/quests/Locked-In-THE-MUSEUM'],
  '/daleville': ['/daleville/quests/brain-busters-escape-rooms-the-grove',
   '/daleville/quests/brain-busters-escape-rooms-booty-battle',
   '/daleville/quests/brain-busters-escape-rooms-out-of-order',
   '/daleville/quests/brain-busters-escape-rooms-wizard-quest',
   '/dalevil