# Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
import time

# Test webscraping haunted house reviews

## Haunted World website URL

In [2]:
homepage_url = "https://www.hauntworld.com/"

## Home response

In [3]:
homepage_response = requests.get(homepage_url)

## Home soup

In [4]:
homepage_soup = BeautifulSoup(homepage_response.text, "html.parser")

### State href

In [5]:
state_href = []
for a in homepage_soup.find("div", class_ = "col-md-12 col-sm-12 col-xs-12 x-min-padding text-center").find_all("a"):
    state_href.append(a["href"])

# See them
state_href

['alabama-haunted-houses',
 'alaska-haunted-houses',
 'arizona-haunted-houses',
 'arkansas-haunted-houses',
 'california-haunted-houses',
 'colorado-haunted-houses',
 'connecticut-haunted-houses',
 'delaware-haunted-houses',
 'district-of-columbia-haunted-houses',
 'florida-haunted-houses',
 'georgia-haunted-houses',
 'hawaii-haunted-houses',
 'idaho-haunted-houses',
 'illinois-haunted-houses',
 'indiana-haunted-houses',
 'iowa-haunted-houses',
 'kansas-haunted-houses',
 'kentucky-haunted-houses',
 'louisiana-haunted-houses',
 'maine-haunted-houses',
 'maryland-haunted-houses',
 'massachusetts-haunted-houses',
 'michigan-haunted-houses',
 'minnesota-haunted-houses',
 'mississippi-haunted-houses',
 'missouri-haunted-houses',
 'montana-haunted-houses',
 'nebraska-haunted-houses',
 'nevada-haunted-houses',
 'new-hampshire-haunted-houses',
 'new-jersey-haunted-houses',
 'new-mexico-haunted-houses',
 'new-york-haunted-houses',
 'north-carolina-haunted-houses',
 'north-dakota-haunted-houses'

## Haunted World State URLs

In [6]:
state_page_urls = [homepage_url + href for href in state_href]

# See them
state_page_urls

['https://www.hauntworld.com/alabama-haunted-houses',
 'https://www.hauntworld.com/alaska-haunted-houses',
 'https://www.hauntworld.com/arizona-haunted-houses',
 'https://www.hauntworld.com/arkansas-haunted-houses',
 'https://www.hauntworld.com/california-haunted-houses',
 'https://www.hauntworld.com/colorado-haunted-houses',
 'https://www.hauntworld.com/connecticut-haunted-houses',
 'https://www.hauntworld.com/delaware-haunted-houses',
 'https://www.hauntworld.com/district-of-columbia-haunted-houses',
 'https://www.hauntworld.com/florida-haunted-houses',
 'https://www.hauntworld.com/georgia-haunted-houses',
 'https://www.hauntworld.com/hawaii-haunted-houses',
 'https://www.hauntworld.com/idaho-haunted-houses',
 'https://www.hauntworld.com/illinois-haunted-houses',
 'https://www.hauntworld.com/indiana-haunted-houses',
 'https://www.hauntworld.com/iowa-haunted-houses',
 'https://www.hauntworld.com/kansas-haunted-houses',
 'https://www.hauntworld.com/kentucky-haunted-houses',
 'https://w

## State page responses and soups

In [7]:
# containers for responses and soups
state_page_responses = []
state_page_soups = []

# loop through urls to make soup
for i, url_i in enumerate(state_page_urls):
    # get url response
    response_i = requests.get(url_i)
    
    # make soup
    soup_i = BeautifulSoup(response_i.text, "html.parser")
    
    # add response and soup to their containers
    state_page_responses.append(response_i)
    state_page_soups.append(soup_i)
    
    # sleep for 1-5 seconds
    sleep_time_i = np.random.randint(low = 1, high = 5, size = 1)
    time.sleep(sleep_time_i)
    
    # Print loop message
    print("Made soup {}. Sleeping for {} seconds...".format(i, sleep_time_i))

Made soup 0. Sleeping for [3] seconds...
Made soup 1. Sleeping for [4] seconds...
Made soup 2. Sleeping for [4] seconds...
Made soup 3. Sleeping for [3] seconds...
Made soup 4. Sleeping for [4] seconds...
Made soup 5. Sleeping for [1] seconds...
Made soup 6. Sleeping for [1] seconds...
Made soup 7. Sleeping for [2] seconds...
Made soup 8. Sleeping for [3] seconds...
Made soup 9. Sleeping for [3] seconds...
Made soup 10. Sleeping for [1] seconds...
Made soup 11. Sleeping for [2] seconds...
Made soup 12. Sleeping for [1] seconds...
Made soup 13. Sleeping for [4] seconds...
Made soup 14. Sleeping for [3] seconds...
Made soup 15. Sleeping for [2] seconds...
Made soup 16. Sleeping for [2] seconds...
Made soup 17. Sleeping for [3] seconds...
Made soup 18. Sleeping for [3] seconds...
Made soup 19. Sleeping for [1] seconds...
Made soup 20. Sleeping for [4] seconds...
Made soup 21. Sleeping for [3] seconds...
Made soup 22. Sleeping for [4] seconds...
Made soup 23. Sleeping for [1] seconds...
Ma

## Haunt page href and titles

In [57]:
# containers for haunt href and titles
haunt_href = []
haunt_titles = []

for soup_i in state_page_soups[0:5]:
    try:
        for div in soup_i.find("div", {"class": "row mt-4"}).find_all("div", {"class": "panel-heading text-bold responsive-text-center"}):
            # href for making url to haunt specific page
            href_i = (div
                      .find("div", {"class": "col-md-8 p-0"})
                      .find("a")["href"])

            # haunt title
            haunt_title_i = (div
                             .find("div", {"class": "col-md-8 p-0"})
                             .find("h4", {"class": "p-0 m-0"})
                             .find("u")
                             .get_text())

            # append href and title to their containers
            haunt_href.append(href_i)
            haunt_titles.append(haunt_title_i)

            # sleep for 1-5 seconds
            sleep_time_i = np.random.randint(low = 1, high = 5, size = 1)
            time.sleep(sleep_time_i)
            
            # print titles
            print(haunt_title_i + " (" + homepage_url + href_i + ")")
    except:
        # append href and title to their containers
        haunt_href.append("None")
        haunt_titles.append("None")
        
        # print titles
        print("None")

Hollis Haunted Chicken House in Heflin, Alabama (https://www.hauntworld.com//haunted-house-in-heflin-Alabama-hollis-haunted-chicken)
NETHERWORLD Haunted House in Stone Mountain, Georgia (https://www.hauntworld.com//haunted-house-in-norcross-Georgia-netherworld-haunted-house)
None
Mount Mayhem Haunted House in PHOENIX, Arizona (https://www.hauntworld.com//haunted-house-in-phoenix-arizona-mount-mayhem-haunted-house)
The Haunted Hotel of Arkansas in Little Rock, Arkansas (https://www.hauntworld.com//haunted-house-in-little-rock-Arkansas-the-haunted-hotel)
The Darkness in Saint Louis, Missouri (https://www.hauntworld.com//haunted-house-in-saint-louis-Missouri-the-darkness)
Creepyworld Screampark in Saint Louis/Fenton, Missouri (https://www.hauntworld.com//haunted-house-in-saint-louis-fenton-missouri-creepyworld-screampark)
Lemp Haunted House in St. Louis, Missouri (https://www.hauntworld.com//haunted-house-in-st-louis-Missouri-lemp-brewery-haunted)
Winchester Mystery House - Unhinged in Sa

## Haunt review pages

In [120]:
haunt_page_urls = [homepage_url + href.strip("/") for href in haunt_href]

# See them
haunt_page_urls

['https://www.hauntworld.com/haunted-house-in-heflin-Alabama-hollis-haunted-chicken',
 'https://www.hauntworld.com/haunted-house-in-norcross-Georgia-netherworld-haunted-house',
 'https://www.hauntworld.com/None',
 'https://www.hauntworld.com/haunted-house-in-phoenix-arizona-mount-mayhem-haunted-house',
 'https://www.hauntworld.com/haunted-house-in-little-rock-Arkansas-the-haunted-hotel',
 'https://www.hauntworld.com/haunted-house-in-saint-louis-Missouri-the-darkness',
 'https://www.hauntworld.com/haunted-house-in-saint-louis-fenton-missouri-creepyworld-screampark',
 'https://www.hauntworld.com/haunted-house-in-st-louis-Missouri-lemp-brewery-haunted',
 'https://www.hauntworld.com/winchestermysteryhouse']

## Haunt addresses

In [59]:
# containers for haunt addresses
haunt_addresses = []

for soup_i in state_page_soups[0:5]:
    try:
        for child in soup_i.select("div.font-14:nth-child(1)"):
            # haunt address
            haunt_addresse_i = child.get_text()
            
            # add to address container
            haunt_addresses.append(haunt_addresse_i)
            
            # print address
            print(haunt_addresse_i)
    except:
        haunt_addresses.append("None")
        print("None")

Address: 7522 AL-1, Heflin, Alabama 36264, US.
Address: 2076 West Park Place Blvd., Stone Mountain, Georgia 30087, USA.
Address: 1740 E Purdue Ave, PHOENIX, Arizona 85020, USA.
Address: 4601 So University, Little Rock, Arkansas 72204, USA.
Address: 1525 South 8th Street, Saint Louis, Missouri 63104, USA.
Address: 1400 S Old Highway 141, Saint Louis/Fenton, Missouri 63026, USA.
Address: 3500 Lemp Avenue , St. Louis, Missouri 63118, united states.
Address: 525 S. Winchester Blvd, San Jose, California 95128, United States.


## Haunt review page attributes

In [77]:
# containers for haunt web reviews, youtube videos, photos, and profile views
haunt_web_reviews = []
haunt_youtube_videos = []
haunt_photos = []
haunt_profile_views = []

for soup_i in state_page_soups[0:5]:
    try:
        for ul in soup_i.find_all("ul", {"class": "list-unstyled skullul"}):
            # get all "skull attributes" (containers above name skull attributes)
            skull_attributes = [u.get_text() for u in ul.find_all("u")]

            # extract attributes in order they appear on page
            haunt_web_reviews_i = skull_attributes[0]
            haunt_youtube_videos_i = skull_attributes[1]
            haunt_photos_i = skull_attributes[2]
            haunt_profile_views_i = skull_attributes[3]
            
            # add to attributes to their containers
            haunt_web_reviews.append(haunt_web_reviews_i)
            haunt_youtube_videos.append(haunt_youtube_videos_i)
            haunt_photos.append(haunt_photos_i)
            haunt_profile_views.append(haunt_profile_views_i)
            
            # print address
            print(haunt_web_reviews_i)
            print(haunt_youtube_videos_i)
            print(haunt_photos_i)
            print(haunt_profile_views_i)
    except:
        haunt_web_reviews.append("None")
        haunt_youtube_videos.append("None")
        haunt_photos.append("None")
        haunt_profile_views.append("None")
        print("None\n" * 4)

10 Web reviews
1 Youtube Videos
9 Photos
62679 Profile Views
12 Web reviews
28 Youtube Videos
37 Photos
226086 Profile Views
5 Web reviews
3 Youtube Videos
36 Photos
26967 Profile Views
17 Web reviews
0 Youtube Videos
0 Photos
34772 Profile Views
9 Web reviews
51 Youtube Videos
230 Photos
270510 Profile Views
4 Web reviews
25 Youtube Videos
202 Photos
236402 Profile Views
5 Web reviews
22 Youtube Videos
80 Photos
129797 Profile Views
0 Web reviews
1 Youtube Videos
2 Photos
1463 Profile Views


## Haunt page responses and soups

In [121]:
# containers for responses and soups
haunt_page_responses = []
haunt_page_soups = []

# loop through urls to make soup
for i, url_i in enumerate(haunt_page_urls):
    # get url response
    response_i = requests.get(url_i)
    
    # make soup
    soup_i = BeautifulSoup(response_i.text, "html.parser")
    
    # add response and soup to their containers
    haunt_page_responses.append(response_i)
    haunt_page_soups.append(soup_i)
    
    # sleep for 1-5 seconds
    sleep_time_i = np.random.randint(low = 1, high = 5, size = 1)
    time.sleep(sleep_time_i)
    
    # Print loop message
    print("Made soup {}. Sleeping for {} seconds...".format(i, sleep_time_i))

Made soup 0. Sleeping for [4] seconds...
Made soup 1. Sleeping for [4] seconds...
Made soup 2. Sleeping for [1] seconds...
Made soup 3. Sleeping for [2] seconds...
Made soup 4. Sleeping for [1] seconds...
Made soup 5. Sleeping for [2] seconds...
Made soup 6. Sleeping for [1] seconds...
Made soup 7. Sleeping for [2] seconds...
Made soup 8. Sleeping for [1] seconds...


## Haunt page review titles

In [122]:
# container for review titles
haunt_review_titles = []

for i, soup_i in enumerate(haunt_page_soups[0:5]):
    try:
        for j, div in enumerate(soup_i.find_all("div", {"class": "text-primary text-semibold col-md-6"})):
            # review title
            haunt_review_title_j = div.get_text()
            
            # add review title to its container
            haunt_review_titles.append(haunt_review_title_j)
            
            # sleep for 1-5 seconds
            sleep_time_i = np.random.randint(low = 1, high = 5, size = 1)
            time.sleep(sleep_time_i)
            
            # Print loop message
            print("Scraped review title {1} from page {2}. Sleeping for {3} seconds...".format(i, j, sleep_time_i))
    except:
        haunt_review_titles.append("None")
        print("None")

## Haunt page review dates

## Haunt page reviews

In [132]:
1 + 1

2

## Haunt page review skull scores