# Import libraries

In [180]:
# uncomment to install libraries
# ! pip install numpy pandas matplotlib seaborn requests selenium bs4
# ! pip install jupyter_contrib_nbextensions && jupyter contrib nbextension install 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import selenium
from bs4 import BeautifulSoup
import time

# Test web scraping escape room reviews

## World of Escapes map page URL

In [181]:
map_page_url = "https://worldofescapes.com/map"

# also home page
home_page_url = "https://worldofescapes.com"

## Map page response

In [182]:
map_page_response = requests.get(map_page_url)

## Map page soup

In [183]:
map_page_soup = BeautifulSoup(map_page_response.text, "html.parser")

## Map states and city href and URLs

In [184]:
# dictionary container for states and their city href and urls
state_city_href = {}
state_city_urls = {}

# loop through states and add their city href
for state_item_i in map_page_soup.find_all("div", {"class": "col-lg-3 col-md-4 col-sm-6 col-xs-6 state-item"}):
    # state str
    state_i = state_item_i.find("h3").get_text().strip().lower()
    
    # add states to href and url dictionaries
    state_city_href[state_i] = []
    state_city_urls[state_i] = []
    
    # add cities to state values
    for city_item_j in state_item_i.find_all("li", {"class": "city-item"}):
        state_city_href[state_i].append(city_item_j.a["href"])
        state_city_urls[state_i].append(home_page_url + city_item_j.a["href"])

# See URLs (which incorporate href)
state_city_urls

{'alabama': ['https://worldofescapes.com/auburn',
  'https://worldofescapes.com/birm',
  'https://worldofescapes.com/daleville',
  'https://worldofescapes.com/dothan',
  'https://worldofescapes.com/florence-al',
  'https://worldofescapes.com/gadsden',
  'https://worldofescapes.com/henagar',
  'https://worldofescapes.com/huntsville',
  'https://worldofescapes.com/mobile',
  'https://worldofescapes.com/montgomery',
  'https://worldofescapes.com/gulf-shores',
  'https://worldofescapes.com/oxford-al',
  'https://worldofescapes.com/tuscaloosa'],
 'alaska': ['https://worldofescapes.com/anchorage',
  'https://worldofescapes.com/fairbanks',
  'https://worldofescapes.com/juneau',
  'https://worldofescapes.com/skagway'],
 'arizona': ['https://worldofescapes.com/cottonwood',
  'https://worldofescapes.com/flagstaff',
  'https://worldofescapes.com/lake-havasu-city',
  'https://worldofescapes.com/phoenix',
  'https://worldofescapes.com/prescott',
  'https://worldofescapes.com/tucson',
  'https://wor

## Map states and city page responses and soups

In [185]:
# containers for state and their city page responses and soups
state_city_responses = {}
state_city_soups = {}

# loop through responses and soups
# just alabama and alaska for now
for state_key, city_url in {state: state_city_urls[state] for state in ("alabama", "alaska")}.items():
    # add state key to dictionary containers
    state_city_responses[state_key] = []
    state_city_soups[state_key] = []
    
    for url in city_url:
        # get response
        response_j = requests.get(url)
        
        # make soup
        soup_j = BeautifulSoup(response_j.text, "html.parser")
        
        # append response and soup to state
        state_city_responses[state_key].append(response_j)
        state_city_soups[state_key].append(soup_j)
        
        # sleep for a random number of seconds
        sleep_time_j = np.random.randint(low = 1, high = 5, size = 1)
        time.sleep(sleep_time_j)
        
        # print loop summary
        print("Made soup for {} in {}. Sleeping for {} seconds...".format(url, state_key, sleep_time_j))

Made soup for https://worldofescapes.com/auburn in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/birm in alabama. Sleeping for [2] seconds...
Made soup for https://worldofescapes.com/daleville in alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/dothan in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/florence-al in alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gadsden in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/henagar in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/huntsville in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/mobile in alabama. Sleeping for [4] seconds...
Made soup for https://worldofescapes.com/montgomery in alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gulf-shores in alabama. Sleeping for [2] seconds...
Made soup f

## Escape room href and URLs

In [187]:
# container for city urls within states
state_city_room_href = {}
state_city_room_urls = {}

# loop through states and cities within states
# just alabama and alaska for now (state_city_soups only contains soups from alabama and alaska from previous code chunk)
for state_key, city_soup in state_city_soups.items():
    # add state key to dictionary containers
    state_city_room_href[state_key] = {}
    state_city_room_urls[state_key] = {}

    for j, soup in enumerate(city_soup):
        # add dictionary container to each city href value
        # nested dictionaries, e.g., {"alabama": "/auburn": [NEW escape room href go here, ...]}
        state_city_room_href[state_key][state_city_href[state_key][j]] = []
        state_city_room_urls[state_key][state_city_href[state_key][j]] = []
        
        # loop through a tags with escape room href
        if soup.find("div", {"data-content": "other-quests"}) is not None:
            for a in soup.find("div", {"data-content": "other-quests"}).find_all("a", {"class": "item-hover quest_tile_hover_link"}):
                
                # add href
                state_city_room_href[state_key][state_city_href[state_key][j]].append(a["href"])
                
                # add url
                state_city_room_urls[state_key][state_city_href[state_key][j]].append(home_page_url + a["href"])
                
                # print loop summary
                print("Added {} for {}, {}.".format(home_page_url + a["href"], state_city_href[state_key][j], state_key))
                
        else:
            print("None")

Added https://worldofescapes.com/auburn/quests/auburn-escape-zones-imprisoned for /auburn in alabama.
Added https://worldofescapes.com/auburn/quests/auburn-escape-zones-the-cabin for /auburn in alabama.
Added https://worldofescapes.com/auburn/quests/auburn-escape-zones-vault for /auburn in alabama.
Added https://worldofescapes.com/auburn/quests/auburn-escape-zones-the-puzzler-vs-superheroes for /auburn in alabama.
Added https://worldofescapes.com/auburn/quests/auburn-escape-zones-black-beards-brig for /auburn in alabama.
Added https://worldofescapes.com/birm/quests/Locked-In-THE-LABORATORY for /birm in alabama.
Added https://worldofescapes.com/birm/quests/locked-in-birm-the-classroom for /birm in alabama.
Added https://worldofescapes.com/birm/quests/Locked-In-THE-WAREHOUSE for /birm in alabama.
Added https://worldofescapes.com/birm/quests/nightmare-at-3008-3008-house for /birm in alabama.
Added https://worldofescapes.com/birm/quests/nightmare-at-3008-camp-mary-lee for /birm in alabama.

## Escape room page responses and soups

In [188]:
# container for escape room page responses and urls
state_city_room_responses = {}
state_city_room_soups = {}

# loop through city href nested within states
# just alabama and alaska for now (state_city_soups only contains soups from alabama and alaska from previous code chunk)
for state_key, city_href in state_city_room_urls.items():
    # add state key to dictionary containers
    state_city_room_responses[state_key] = {}
    state_city_room_soups[state_key] = {}
    
    # loop through urls nested within city href
    for href, room_url in city_href.items():
        # page response and soup containers for city hrefs nested in states
        state_city_room_responses[state_key][href] = []
        state_city_room_soups[state_key][href] = []
        
        # loop through urls and indices, request page responses and make soups from them
        for r, url in enumerate(room_url):
            ## room page response
            room_response_r = requests.get(url)

            ## soup
            room_soup_r = BeautifulSoup(room_response_r.text, "html.parser")

            # add page responses and soup to each container
            # nested dictionaries, e.g., {"alabama": "/auburn": [NEW escape room page response or soup go here, ...]} 
            state_city_room_responses[state_key][href].append(room_response_r)
            state_city_room_soups[state_key][href].append(room_soup_r)

            # sleep for a random number of seconds
            sleep_time_r = np.random.randint(low = 1, high = 5, size = 1)
            time.sleep(sleep_time_r)
        
            # print loop summary
            print("Made soup for {0} in {1}, {2}. Sleeping for {3} seconds...".format(url, href, state_key, sleep_time_j))
        

Made soup for https://worldofescapes.com/auburn/quests/auburn-escape-zones-imprisoned in /auburn, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/auburn/quests/auburn-escape-zones-the-cabin in /auburn, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/auburn/quests/auburn-escape-zones-vault in /auburn, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/auburn/quests/auburn-escape-zones-the-puzzler-vs-superheroes in /auburn, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/auburn/quests/auburn-escape-zones-black-beards-brig in /auburn, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/birm/quests/Locked-In-THE-LABORATORY in /birm, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/birm/quests/locked-in-birm-the-classroom in /birm, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/birm/quests/L

Made soup for https://worldofescapes.com/montgomery/quests/breakout-montgomery-operation-casino in /montgomery, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/montgomery/quests/breakout-games-montgomery-mystery-mansion in /montgomery, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gulf-shores/quests/get-a-clue-crime-scene in /gulf-shores, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gulf-shores/quests/Gulf-Coast-Escape-Room-THE-DOLL-ROOM in /gulf-shores, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gulf-shores/quests/get-a-clue-mardi-gras in /gulf-shores, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gulf-shores/quests/xit-escape-room-insane-asylum in /gulf-shores, alabama. Sleeping for [1] seconds...
Made soup for https://worldofescapes.com/gulf-shores/quests/Gulf-Coast-Escape-Room-ZOMBIE-LAB in /gulf-shores, alabama. Sleeping 

### title

In [227]:
state_city_room_soups["alabama"]["/auburn"][0].find("title").get_text()

'Escape room "Imprisoned" by Auburn Escape Zones in Auburn'

### description

In [229]:
state_city_room_soups["alabama"]["/auburn"][0].find("div", {"class": "description"}).get_text()

"Description:Your friend is being framed for a crime he didn't commit! He's held in a corrupt prison, and the warden has bribed the other inmates to hide the evidence needed to exonerate your friend. Determined to make things right, you've managed to sneak into the warden's office. With just an hour left before the death row, can you find the necessary proof to clear your friend's name?"

### address

In [None]:
state_city_room_soups["alabama"]["/auburn"][0].find("div", {"data-content": "address"}).get_text().strip()

### tags

In [None]:
# address

In [221]:
for li in (state_city_room_soups["alabama"]["/auburn"][0]
    .find("div", {"class": "tags"})
    .find("ul", {"class": "tags-2"})
    .find_all("li")):
    print(li.get_text())

Prison Break
Public Ticketing
Up to eight players


In [217]:
state_city_room_soups["alabama"]["/auburn"][0].find("ul", {"class": "params-ul"}).find_all("li")

[<li class="cell" data-content="participants-count"><span class="th">Number of players</span><span class="td">2 – 8</span></li>,
 <li class="cell" data-content="time"><span class="th">Time limit</span><span class="td">60 min.</span></li>,
 <li class="cell" data-content="difficulty"><span class="th">Difficulty level (1-4)</span><span class="td"><span data-original-title="Average" data-toggle="tooltip"><i class="fa fa-key"></i><i class="fa fa-key"></i></span></span></li>,
 <li class="cell"><span class="th">Fear level</span><span class="td scary"><span class="in-words">Not scary</span></span></li>,
 <li class="cell"><span class="th">Age requirement</span><span class="td" data-original-title="Children younger than 13 must be accompanied by an adult." data-toggle="tooltip">10+<sup>*</sup></span></li>,
 <li class="cell"><span class="th">Success rate:</span><span class="td" data-original-title="Success rate:" data-toggle="tooltip"> 62 %</span></li>]

### room attributes

In [222]:
for li in state_city_room_soups["alabama"]["/auburn"][0].find("ul", {"class": "params-ul"}).find_all("li"):
    print(li.find("span", {"class": "td"}).get_text())

2 – 8
60 min.

Not scary
10+*
 62 %


### reviews

In [248]:
for review in state_city_room_soups["alabama"]["/auburn"][0].find("ul", {"class": "masonry-list"}).find_all("p", {"class": "content"}):
    print(review.get_text())

This was so fun! We took our kids and we did the "Imprisoned" room. It was a surprise for all 5 of our kids, ranging from 10-15. We really had to work as a team and managed to escape with just a minute to spare! There were smiles the rest of the day and continual praise for the experience!The staff could not be more amazing! I already want to go back!
My husband and I just did the imprisoned challenge, and we will definitely be repeat customers. The staff was cheerful and helpful, and the clues they gave were great as well. We were floored by the details that went into our room, and we had a blast trying to keep track of all the clues. The puzzles were the right amount of challenging, and they throw in some fantastic twists that made it even better. We hadn't even left the parking lot before we started planning our next trip back. I can't recommend it enough!
We had a group of seven, which included five 13-year-olds, solving the "Imprisoned" room, and it was so great! The staff was ver

## Escape room/game reviews

In [243]:
len(state_city_room_soups["alabama"]["/auburn"][0].find("ul", {"class": "masonry-list"}).find_all("p", {"class": "content"}))

5

In [246]:
# container for reviews nested within game nested within city nested within state
state_city_room_reviews = {}

# loop through city href nested within states
# just alabama and alaska for now (state_city_soups only contains soups from alabama and alaska from previous code chunk)
for state_key, city_href in state_city_room_soups.items():
    # add state key to dictionary containers
    state_city_room_reviews[state_key] = {}
    
    # loop through urls nested within city href
    for href, room_soup in city_href.items():
        # room containers for city hrefs nested in states
        state_city_room_reviews[state_key][href] = {}
    
        # loop through urls nested within city href
        for k, room in enumerate(room_soup):
            # room containers for city href nested in states
            # e.g. {"alabama": {"/auburn": {"/auburn/quests/auburn-escape-zones-imprisoned": [review_1, ...]}}}
            state_city_room_reviews[state_key][href][state_city_room_href[state_key][href][k]] = []
            
            # review html
            if room.find("ul", {"class": "masonry-list"}) is not None:
                review_content = room.find("ul", {"class": "masonry-list"}).find_all("p", {"class": "content"})
            
                # loop through review content
                for r, review in enumerate(review_content, start = 1):
                    state_city_room_reviews[state_key][href][state_city_room_href[state_key][href][k]].append(review.get_text())

                    # print loop summary
                    print("Scraped {0} out of {1} reviews of {2} in {3}, {4}.".format(r, len(review_content), state_city_room_href[state_key][href][k], href, state_key, sleep_time_j))
            else:
                state_city_room_reviews[state_key][href][state_city_room_href[state_key][href][k]].append("None")
                print("No reviews for {0} in {1}, {2}.".format(state_city_room_href[state_key][href][k], href, state_key, sleep_time_j))

Scraped 1 out of 5 reviews of /auburn/quests/auburn-escape-zones-imprisoned in /auburn, alabama.
Scraped 2 out of 5 reviews of /auburn/quests/auburn-escape-zones-imprisoned in /auburn, alabama.
Scraped 3 out of 5 reviews of /auburn/quests/auburn-escape-zones-imprisoned in /auburn, alabama.
Scraped 4 out of 5 reviews of /auburn/quests/auburn-escape-zones-imprisoned in /auburn, alabama.
Scraped 5 out of 5 reviews of /auburn/quests/auburn-escape-zones-imprisoned in /auburn, alabama.
Scraped 1 out of 5 reviews of /auburn/quests/auburn-escape-zones-the-cabin in /auburn, alabama.
Scraped 2 out of 5 reviews of /auburn/quests/auburn-escape-zones-the-cabin in /auburn, alabama.
Scraped 3 out of 5 reviews of /auburn/quests/auburn-escape-zones-the-cabin in /auburn, alabama.
Scraped 4 out of 5 reviews of /auburn/quests/auburn-escape-zones-the-cabin in /auburn, alabama.
Scraped 5 out of 5 reviews of /auburn/quests/auburn-escape-zones-the-cabin in /auburn, alabama.
Scraped 1 out of 3 reviews of /aubu

Scraped 1 out of 1 reviews of /henagar/quests/escape-room-adventures-zombie-escape in /henagar, alabama.
No reviews for /henagar/quests/escape-room-adventures-dash-for-the-cash in /henagar, alabama.
Scraped 1 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-hexadron in /huntsville, alabama.
Scraped 2 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-hexadron in /huntsville, alabama.
Scraped 3 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-hexadron in /huntsville, alabama.
Scraped 4 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-hexadron in /huntsville, alabama.
Scraped 5 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-hexadron in /huntsville, alabama.
Scraped 1 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-queen-pin in /huntsville, alabama.
Scraped 2 out of 5 reviews of /huntsville/quests/huntsville-escape-rooms-the-queen-pin in /huntsville, alabama.
Scraped 3 out of 5 rev

Scraped 1 out of 3 reviews of /gulf-shores/quests/escape-house-waterville-the-room-of-the-great-mysto in /gulf-shores, alabama.
Scraped 2 out of 3 reviews of /gulf-shores/quests/escape-house-waterville-the-room-of-the-great-mysto in /gulf-shores, alabama.
Scraped 3 out of 3 reviews of /gulf-shores/quests/escape-house-waterville-the-room-of-the-great-mysto in /gulf-shores, alabama.
Scraped 1 out of 2 reviews of /gulf-shores/quests/xit-escape-room-csi-gulf-shores in /gulf-shores, alabama.
Scraped 2 out of 2 reviews of /gulf-shores/quests/xit-escape-room-csi-gulf-shores in /gulf-shores, alabama.
Scraped 1 out of 2 reviews of /gulf-shores/quests/gulf-coast-escape-room-bates-motel in /gulf-shores, alabama.
Scraped 2 out of 2 reviews of /gulf-shores/quests/gulf-coast-escape-room-bates-motel in /gulf-shores, alabama.
Scraped 1 out of 1 reviews of /gulf-shores/quests/xit-escape-room-a-tale-of-two-forts in /gulf-shores, alabama.
Scraped 1 out of 1 reviews of /gulf-shores/quests/get-a-clue-paran

In [242]:
state_city_room_reviews

{'alabama': {'/auburn': {'/auburn/quests/auburn-escape-zones-imprisoned': ['This was so fun! We took our kids and we did the "Imprisoned" room. It was a surprise for all 5 of our kids, ranging from 10-15. We really had to work as a team and managed to escape with just a minute to spare! There were smiles the rest of the day and continual praise for the experience!The staff could not be more amazing! I already want to go back!',
    "My husband and I just did the imprisoned challenge, and we will definitely be repeat customers. The staff was cheerful and helpful, and the clues they gave were great as well. We were floored by the details that went into our room, and we had a blast trying to keep track of all the clues. The puzzles were the right amount of challenging, and they throw in some fantastic twists that made it even better. We hadn't even left the parking lot before we started planning our next trip back. I can't recommend it enough!",
    'We had a group of seven, which include