In [2]:
# Our jupyter/datascience-notebook Docker container comes with 
# BeautifulSoup4 and requests, both popular libraries!

from bs4 import BeautifulSoup
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

## I chose the website selling board games. 

In [3]:
START_URL = 'https://boardgames.com/collections/all'

## Get the names of the board games
#### Normal scraping by defining:

In [4]:
%%time
def get_titles1(soup):    
    """ Returns a list of titles on the page """
    # the "soup" parameter is of the type that is
    # returned by Beautiful Soup when it parses HTML.
    # The function should then use the object to
    # extract a list of titles (of the lego sets)
    
    # Lookup the documentation for Beautiful Soup
    # Figure out how to select the text of the title
    # of each legoset. A title should look like: 
    # "10252: Volkswagen Beetle"
    titles_lego = []
    #divs have different classes but we need meta
    metas = soup.find_all("div",{"class":"h4 grid-view-item__title"})
    for meta in metas:
        titles_lego.append(meta.text)
    return titles_lego

def names(url):
    """ Fetches Lego Bricks page and extracts titles """
    # Lookup the documentation to the "requests" library
    # Use requests to make a get request to the
    # url given in the argument "url" (which is a string)
    # and get the raw HTML body of the response
    r = requests.get(url)
    # Use "BeautifulSoup" to parse this HTML. 
    soup = BeautifulSoup(r.text, "html.parser")
    # Use the "get_titles" function to extract the
    # titles from the BeautifulSoup object.
    titles = get_titles1(soup)
    # Return the titles
    return titles

In [5]:
game_names = names(START_URL)

In [6]:
game_names

['#Alternative Facts',
 '13 Minutes: The Cuban Missile Crisis',
 'Abracada...What?',
 'Among The Stars: Expanding the Alliance',
 'Angry Birds Chutes & Ladders Editon',
 'Animal Act',
 'Apples to Apples',
 'Arkham Horror',
 'Awesome Kingdom: Tower of Hateskull',
 'Bachelorette Drink or Dare Dice',
 'Betrayal at House on the Hill',
 'Better Me',
 'Blood Rage',
 'Boss Monster',
 'Boss Monster 2: The Next Level',
 'Buffy The Vampire Slayer: The Board Game',
 'Candy Land',
 'Cartoon Network Crossover Crisis Deck-Building Game',
 "Cash 'N Guns",
 'Cat Tower',
 'Censored Game',
 'Chronology',
 'Chutes and Ladders',
 'Clank! A Deck Building Adventure',
 'Clubs Card Game',
 'Codenames Pictures',
 'Codenames: Duet',
 'Colt Express',
 'Council of Verona',
 'Council of Verona: Corruption Expansion',
 'Coup',
 'Dancing Eggs',
 'Djubi Game',
 'Dominion',
 "Don't Be A Loser",
 "Don't Be A Loser Adult Expansion",
 'Dr. Beaker',
 'Dr. Eureka Speed Logic Game',
 'Dr. Microbe',
 'Drunk Quest',
 'Escape:

### I got the titles!! :) 

# I go into the seperate pages of websites

In [7]:
response = requests.get(START_URL)
soup = BeautifulSoup(response.text, 'html.parser')

In [8]:
##Create the list of URLS

list_test_url = []

test_url = soup.select('div > div > div > div > div > div > a')

for x in range(0, len(test_url)):
    additem = ("https://boardgames.com" + test_url[x]['href'])
    list_test_url.append(additem)
    
    
list_test_url = list_test_url[:50]

In [9]:
list_test_url

['https://boardgames.com/products/alternative-facts-game',
 'https://boardgames.com/products/13-minutes-game',
 'https://boardgames.com/products/abracada-what-game',
 'https://boardgames.com/products/among-stars-expanding-alliance-game',
 'https://boardgames.com/products/angry-birds-chutes-ladders-editon-game',
 'https://boardgames.com/products/animal-act-game',
 'https://boardgames.com/products/apples-to-apples-game',
 'https://boardgames.com/products/arkham-horror-game',
 'https://boardgames.com/products/awesome-kingdom-tower-of-hateskull-game',
 'https://boardgames.com/products/bachelorette-drink-or-dare-game',
 'https://boardgames.com/products/betrayal-of-house-on-the-hill-game',
 'https://boardgames.com/products/better-me-game',
 'https://boardgames.com/products/blood-rage-game',
 'https://boardgames.com/products/boss-monster-game',
 'https://boardgames.com/products/boss-monster-2-the-next-level-game',
 'https://boardgames.com/products/buffy-the-vampire-slayer-the-board-game',
 'h

In [10]:
##Create a soup item for an individual URL (For testing only -- don't end up using it)

individual_url = 'https://boardgames.com/products/alternative-facts-game'
response2 = requests.get(individual_url)
soup2 = BeautifulSoup(response2.text, 'html.parser')

# Getting names again

In [18]:
name = soup2.find_all("h1",{"class":"product-single__title"})
name[0].text

'#Alternative Facts'

In [12]:
%%time

##Create a function for getting the current prices

list_names = []

def get_names(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    response0 = requests.get(url)
    soup0 = BeautifulSoup(response0.text, 'html.parser')
    
    #this is what goes into the list_current_prices
    name = soup0.find_all("h1",{"class":"product-single__title"})
    
    list_names.append(name[0].text)

Wall time: 0 ns


In [13]:
%%time

list_names = []

with ThreadPoolExecutor(50) as pool:
    #rename to results0
    results0 = pool.map(get_names, list_test_url)
    

print(list_names)

['13 Minutes: The Cuban Missile Crisis', '#Alternative Facts', 'Arkham Horror', 'Animal Act', 'Abracada...What?', 'Better Me', 'Apples to Apples', 'Boss Monster', 'Awesome Kingdom: Tower of Hateskull', 'Bachelorette Drink or Dare Dice', 'Among The Stars: Expanding the Alliance', 'Candy Land', 'Cat Tower', 'Angry Birds Chutes & Ladders Editon', 'Betrayal at House on the Hill', "Don't Be A Loser", 'Council of Verona', 'Codenames: Duet', 'Colt Express', 'Blood Rage', 'Censored Game', 'Chronology', 'Boss Monster 2: The Next Level', 'Cartoon Network Crossover Crisis Deck-Building Game', 'Buffy The Vampire Slayer: The Board Game', 'Clubs Card Game', 'Djubi Game', 'Dr. Eureka Speed Logic Game', 'Clank! A Deck Building Adventure', "Cash 'N Guns", 'Council of Verona: Corruption Expansion', 'Codenames Pictures', 'Chutes and Ladders', 'Dr. Microbe', 'Dr. Beaker', 'Dancing Eggs', "Don't Be A Loser Adult Expansion", 'Coup', "Flick 'Em Up", 'Escape: Zombie City', 'Flying Kiwis', 'Dominion', 'Flip Tr

# Getting original Prices

In [14]:
##Find out how to get the original price for a single URL (Will transfer to a function)

original_price = soup2.select('#ComparePrice-product-template')
original_price[0].text

'$10.00'

In [15]:
%%time

##Create a function for getting the original prices

list_original_prices = []

def get_original_prices(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    newresponse = requests.get(url)
    newsoup = BeautifulSoup(newresponse.text, 'html.parser')
    
    #this is what goes into the list_original_prices
    original_price = newsoup.select('#ComparePrice-product-template')
    
    if  original_price[0].text == '':
        list_original_prices.append("No sale")
    else:
        list_original_prices.append(original_price[0].text)
    

Wall time: 0 ns


In [16]:
%%time

list_original_prices = []

##Call the function to get the original prices on every URL with a for loop (will speed up later with parallelization)
for url in list_test_url:
    get_original_prices(url)
    
print(list_original_prices)

['$10.00', '$10.99', '$34.99', '$29.95', '$19.99', '$19.95', '$26.99', '$59.95', '$29.99', 'No sale', '$49.99', '$80.00', '$79.99', '$24.99', '$24.99', '$39.99', '$10.99', '$39.99', '$39.99', '$19.99', '$25.00', '$19.99', '$10.99', '$59.99', '$14.99', '$19.99', '$19.99', '$39.99', '$19.99', '$12.99', '$14.99', '$20.00', '$25.99', '$44.99', '$29.99', '$19.99', '$19.99', '$19.99', '$21.99', '$34.95', '$59.95', '$69.95', 'No sale', '$19.99', '$26.99', '$19.99', '$30.00', '$85.00', '$24.99', '$19.99']
Wall time: 37.4 s


In [17]:
%%time

list_original_prices = []

with ThreadPoolExecutor(50) as pool:
    #rename to results1 to results2 
    results1 = pool.map(get_original_prices, list_test_url)
    

print(list_original_prices)

['$10.00', 'No sale', '$80.00', '$34.99', '$29.99', '$49.99', '$59.95', '$26.99', '$19.95', '$20.00', '$19.99', '$10.99', '$19.99', '$19.99', '$24.99', '$29.95', '$24.99', '$79.99', '$10.99', '$39.99', '$59.95', '$34.95', '$25.00', '$25.99', '$10.99', '$39.99', '$14.99', '$44.99', '$29.99', '$19.99', '$19.99', '$39.99', '$19.99', '$12.99', '$14.99', '$59.99', '$19.99', '$85.00', '$69.95', '$24.99', '$26.99', '$21.99', '$39.99', '$19.99', '$19.99', 'No sale', '$19.99', '$30.00', '$19.99', '$19.99']
Wall time: 7.03 s


# Getting current price

In [19]:
##Find out how to get the current price for a single URL (Will transfer to a function)

current_price = soup2.select('#ProductPrice-product-template')
current_price[0].text

'$9.00'

In [20]:
%%time

##Create a function for getting the current prices

list_current_prices = []

def get_current_prices(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    response3 = requests.get(url)
    soup3 = BeautifulSoup(response3.text, 'html.parser')
    
    #this is what goes into the list_current_prices
    current_price = soup3.select('#ProductPrice-product-template')
    
    list_current_prices.append(current_price[0].text)

Wall time: 0 ns


In [21]:
%%time

list_current_prices = []

##Call the function to get the original prices on every URL with a for loop (will speed up later with parallelization)
for url in list_test_url:
    get_current_prices(url)
    
print(list_current_prices)

['$9.00', '$9.89', '$31.49', '$27.99', '$17.99', '$17.95', '$24.29', '$53.96', '$26.99', '$6.00', '$42.99', '$54.95', '$72.95', '$22.49', '$22.49', '$35.96', '$9.89', '$35.99', '$37.99', '$17.99', '$23.50', '$17.96', '$9.89', '$51.99', '$11.99', '$17.99', '$17.99', '$34.99', '$17.99', '$9.99', '$13.49', '$18.00', '$19.99', '$40.49', '$21.99', '$14.99', '$17.99', '$17.99', '$19.99', '$28.74', '$53.95', '$62.99', '$14.99', '$17.99', '$24.29', '$15.99', '$27.00', '$76.50', '$22.49', '$17.99']
Wall time: 29.6 s


In [22]:
%%time

list_current_prices = []

with ThreadPoolExecutor(50) as pool:
    #rename to results3
    results3 = pool.map(get_current_prices, list_test_url)
    

print(list_current_prices)

['$9.89', '$17.95', '$27.99', '$9.00', '$26.99', '$31.49', '$24.29', '$17.99', '$72.95', '$53.96', '$23.50', '$17.99', '$42.99', '$54.95', '$37.99', '$6.00', '$35.96', '$22.49', '$17.96', '$9.89', '$22.49', '$18.00', '$9.89', '$35.99', '$40.49', '$17.99', '$9.99', '$19.99', '$21.99', '$17.99', '$17.99', '$17.99', '$51.99', '$11.99', '$13.49', '$14.99', '$34.99', '$28.74', '$19.99', '$53.95', '$62.99', '$17.99', '$27.00', '$22.49', '$17.99', '$15.99', '$17.99', '$14.99', '$24.29', '$76.50']
Wall time: 6.59 s


# Getting age

In [23]:
age = soup2.select('#ProductSection-product-template > div > div > div > ul.prodFeature > li')
age[0].text


' Ages 14+'

In [24]:
%%time

##Create a function for getting the current prices

list_age = []

def get_age(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    response5 = requests.get(url)
    soup5 = BeautifulSoup(response5.text, 'html.parser')
    
    #this is what goes into the list_current_prices
    age = soup5.select('#ProductSection-product-template > div > div > div > ul.prodFeature > li')
    
    list_age.append(age[0].text)

Wall time: 0 ns


In [25]:
%%time

list_age = []

##Call the function to get availability
for url in list_test_url:
    get_age(url)
    
print(list_age)

[' Ages 14+', ' Ages 10+', ' Ages 8+', ' Ages 12+', ' Ages 3+', ' Ages 4+', ' Ages 12+', ' Ages 12+', ' Ages 10+', ' Ages 21+', ' Ages 12+', ' Ages 12+', ' Ages 14+', ' Ages 13+', ' Ages 13+', ' 10 & up', ' Ages 3+', ' Ages 12+', ' Ages 10+', ' Ages 6+', ' Ages 18+', ' Ages 12+', ' Ages 3+', ' 12 & up', ' Ages 8+', ' Ages 14+', ' 11 & up', ' Ages 10+', ' Ages 13+', ' Ages 13+', ' Ages 9+', ' Ages 5+', ' Ages 8+', ' Ages 13+', ' Ages 13+', ' Ages 13+', ' Ages 8+', ' Ages 8+', ' Ages 8+', ' Ages 21+', ' 10 & up', ' Ages 7+', ' Ages 8+', ' Ages 5+', ' Ages 10+', ' Ages 13+', ' Ages 10+', ' 15 & up', ' Ages 13+', ' Ages 6+']
Wall time: 28.4 s


In [26]:
%%time

list_age = []

with ThreadPoolExecutor(50) as pool:
    #rename to results4
    results4 = pool.map(get_age, list_test_url)
    

print(list_age)

[' Ages 8+', ' Ages 14+', ' Ages 12+', ' Ages 10+', ' Ages 12+', ' Ages 12+', ' Ages 12+', ' Ages 10+', ' Ages 3+', ' Ages 12+', ' Ages 21+', ' Ages 4+', ' Ages 12+', ' Ages 13+', ' Ages 3+', ' Ages 14+', ' Ages 3+', ' Ages 10+', ' Ages 8+', ' 10 & up', ' Ages 13+', ' Ages 8+', ' 11 & up', ' Ages 14+', ' Ages 8+', ' Ages 5+', ' Ages 9+', ' Ages 8+', ' 12 & up', ' Ages 8+', ' Ages 6+', ' Ages 12+', ' Ages 18+', ' Ages 13+', ' Ages 10+', ' Ages 13+', ' Ages 13+', ' Ages 13+', ' Ages 21+', ' Ages 8+', ' Ages 10+', ' Ages 13+', ' Ages 7+', ' 10 & up', ' Ages 13+', ' Ages 10+', ' Ages 5+', ' 15 & up', ' Ages 13+', ' Ages 6+']
Wall time: 6.45 s


# Getting Description

In [27]:
description = soup2.find_all("div",{"class":"inner"})
description[0].text.replace("\n", "")


"What's more fun than making up click-baiting headlines? Getting the credit for it! Grab your friends, work together (or not), and prove your genius wordsmithing by making up some Alternative Facts!"

In [28]:
%%time

##Create a function for getting the current prices

list_description = []

def get_description(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    response6 = requests.get(url)
    soup6 = BeautifulSoup(response6.text, 'html.parser')
    
    #this is what goes into the list_current_prices
    description = soup6.find_all("div",{"class":"inner"})
    
    list_description.append(description[0].text.replace("\n", ""))

Wall time: 0 ns


In [29]:
%%time

list_description = []

##Call the function to get availability
for url in list_test_url:
    get_description(url)
    
print(list_description)

["What's more fun than making up click-baiting headlines? Getting the credit for it! Grab your friends, work together (or not), and prove your genius wordsmithing by making up some Alternative Facts!", '13 Minutes: The Cuban Missile Crisis is a card game where you play as either Kennedy or Khrushchev to exit the Cuban Missile Crisis as the ultimate superpower. Play strategy cards that will influence battlegrounds, and avoid triggering a global nuclear war!', 'Abracada...what? is an exciting deduction game where players battle each other as powerful wizards. The catch is that only your opponents see and know the spells you can cast, and if you cast the same spell too often you will lose the game! Great for families!', 'The second expansion for Among The Stars! Assume the role of an alien race tasked to build the best and most efficient space station for the Alliance! Includes additional Power Reactor cards, Energy tokens, player markers, and more. For up to 6 players.', "The classic Chu

In [30]:
%%time

list_description = []

with ThreadPoolExecutor(50) as pool:
    #rename to results5
    results5 = pool.map(get_description, list_test_url)
    

print(list_description)

["What's more fun than making up click-baiting headlines? Getting the credit for it! Grab your friends, work together (or not), and prove your genius wordsmithing by making up some Alternative Facts!", '13 Minutes: The Cuban Missile Crisis is a card game where you play as either Kennedy or Khrushchev to exit the Cuban Missile Crisis as the ultimate superpower. Play strategy cards that will influence battlegrounds, and avoid triggering a global nuclear war!', 'The second expansion for Among The Stars! Assume the role of an alien race tasked to build the best and most efficient space station for the Alliance! Includes additional Power Reactor cards, Energy tokens, player markers, and more. For up to 6 players.', "The classic Chutes and Ladders game with an Angry Birds theme! Race to King Pig's Castle by going up the ladder, but land on the wrong spot and you'll go down the chute! The first player to reach the Castle wins. Great for children ages 3+.", 'Apples to Apples is THE easy to lea

# Getting Players

In [31]:
players = soup2.find_all("li",{"title":"Number of Players"})
players[0].text

' 2-6 players'

In [32]:
%%time

##Create a function for getting number of players

list_players = []

def get_players(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    response7 = requests.get(url)
    soup7 = BeautifulSoup(response7.text, 'html.parser')
    
    #this is what goes into the list_current_prices
    players = soup7.find_all("li",{"title":"Number of Players"})
    
    list_players.append(players[0].text)

Wall time: 0 ns


In [33]:
%%time

list_players = []

##Call the function to get players
for url in list_test_url:
    get_players(url)
    
print(list_players)

[' 2-6 players', ' 2 players', ' 2-5 players', ' 2-4 players', ' 2-6 players', ' 2-12 players', ' 4-10 players', ' 1-8 players', ' 2-4 players', ' 2+ players', ' 3-6 players', ' 2-8 players', ' 2-4 players', ' 2-4 players', ' 2-4 players', ' 1 to 6', ' 2-4 players', ' 2-5 players', ' 4-8 players', ' 2-6 players', ' 4+ players', ' 2-8 players', ' 2-4 players', ' 1 to 4', ' 2-6 players', ' 2-8 players', ' 2 to 4', ' 2-6 players', ' 2-5 players', ' 2-5 players', ' 2-6 players', ' 4+ players', ' 2 players', ' 2-4 players', ' 4-10 players', ' 4-10 players', ' 2-4 players', ' 1-4 players', ' 2-4 players', ' 3-6 players', ' 2 to 4', ' 2-10 players', ' 1+ players', ' 2-4 players', ' 2-5 players', ' 1 players', ' 1-5 players', ' 1 to 4', ' 2-5 players', ' 2-4 players']
Wall time: 30.2 s


In [34]:
%%time

list_players = []

with ThreadPoolExecutor(50) as pool:
    #rename to results6
    results6 = pool.map(get_players, list_test_url)
    

print(list_players)

[' 2 players', ' 2-5 players', ' 2-4 players', ' 1 to 6', ' 4-10 players', ' 1-8 players', ' 2+ players', ' 2-4 players', ' 2-12 players', ' 2-4 players', ' 2-6 players', ' 2-6 players', ' 2-8 players', ' 4+ players', ' 3-6 players', ' 2-4 players', ' 2 to 4', ' 2-4 players', ' 2-4 players', ' 2-4 players', ' 2-6 players', ' 2-5 players', ' 2-8 players', ' 1 to 4', ' 4-8 players', ' 4-10 players', ' 2-6 players', ' 2-5 players', ' 2-8 players', ' 2-4 players', ' 2-10 players', ' 2 to 4', ' 1+ players', ' 2-4 players', ' 2-6 players', ' 2-6 players', ' 2 players', ' 4+ players', ' 1-4 players', ' 1 to 4', ' 4-10 players', ' 1-5 players', ' 2-4 players', ' 2-4 players', ' 2-5 players', ' 2-5 players', ' 3-6 players', ' 1 players', ' 2-5 players', ' 2-4 players']
Wall time: 7.29 s


# Getting time to play

In [35]:
play_time = soup2.find_all("li",{"title":"Play Time"})
play_time[0].text

' 15-30 min'

In [36]:
%%time

##Create a function for getting time to play

list_play_time = []

def get_play_time(url): 
    
    ##creates a soup item for each url starting with the starting url (given)
    response8 = requests.get(url)
    soup8 = BeautifulSoup(response8.text, 'html.parser')
    
    #this is what goes into the list_current_prices
    play_time = soup8.find_all("li",{"title":"Play Time"})
    
    list_play_time.append(play_time[0].text)

Wall time: 0 ns


In [37]:
%%time

list_play_time = []

##Call the function to get play time
for url in list_test_url:
    get_play_time(url)
    
print(list_play_time)

[' 15-30 min', ' 13 min.', ' 20-30 min.', ' 30 min.', ' 30 min.', ' 15 min.', ' 30 min.', ' 2-6 hours', ' 30-45 min.', ' 10 min.', ' 1 hour', ' 30-180 min.', ' 1-1.5 hours', ' 20 min.', ' 20-30 min.', ' 40-60 minutes', ' 30 min.', ' 30-45 min.', ' 30 min.', ' 15-20 min.', ' 30 min.', ' 30 min.', ' 15 min.', ' 30-60 minutes', ' 30 min.', ' 15-30 min.', ' 15-30 minutes', ' 30-40 min.', ' 20 min.', ' 20 min.', ' 15 min.', ' 30 min.', ' --', ' 30 min.', ' 30 min.', ' 30 min.', ' 15 min.', ' 15 min.', ' 15 min.', ' 20 min.', ' 15 minutes', ' 30-45 min.', ' 5-15 min.', ' 10 min.', ' 45 min.', ' 25 min.', ' 10 min.', ' 30-120 minutes', ' 60 min.', ' 15 min.']
Wall time: 28.7 s


In [38]:
%%time

list_play_time = []

with ThreadPoolExecutor(50) as pool:
    #rename to results7
    results7 = pool.map(get_play_time, list_test_url)
    

print(list_play_time)

[' 30 min.', ' 15-30 min', ' 13 min.', ' 30 min.', ' 10 min.', ' 2-6 hours', ' 40-60 minutes', ' 20-30 min.', ' 30-45 min.', ' 15 min.', ' 30 min.', ' 15-20 min.', ' 1 hour', ' 1-1.5 hours', ' 30 min.', ' 20-30 min.', ' 30-40 min.', ' 20 min.', ' 30 min.', ' 15-30 minutes', ' 30 min.', ' 30-180 min.', ' 20 min.', ' 30-45 min.', ' 30-60 minutes', ' 15-30 min.', ' 30 min.', ' 15 min.', ' 30 min.', ' 30 min.', ' 30 min.', ' 20 min.', ' 5-15 min.', ' 15 min.', ' --', ' 15 min.', ' 15 minutes', ' 30 min.', ' 15 min.', ' 10 min.', ' 25 min.', ' 30 min.', ' 15 min.', ' 30-45 min.', ' 15 min.', ' 45 min.', ' 60 min.', ' 30-120 minutes', ' 10 min.', ' 20 min.']
Wall time: 11.3 s


# I need to combine all in one function in order to maintain a correct order of entries

In [50]:
%%time
list_names = []
list_original_prices = []
list_current_prices = []
list_age = []
list_players = []
list_play_time = []
list_description = []


def grab_all(url):
    
    newresponse1 = requests.get(url)
    newsoup1 = BeautifulSoup(newresponse1.text, 'html.parser')
    
    name = newsoup1.find_all("h1",{"class":"product-single__title"})
    list_names.append(name[0].text)
    
    original_price = newsoup1.select('#ComparePrice-product-template')
    if  original_price[0].text == '':
        list_original_prices.append("No sale")
    else:
        list_original_prices.append(original_price[0].text)

    current_price = newsoup1.select('#ProductPrice-product-template')
    list_current_prices.append(current_price[0].text)

    age = newsoup1.select('#ProductSection-product-template > div > div > div > ul.prodFeature > li')
    list_age.append(age[0].text)

    players = newsoup1.find_all("li",{"title":"Number of Players"})
    list_players.append(players[0].text)

    play_time = newsoup1.find_all("li",{"title":"Play Time"})
    list_play_time.append(play_time[0].text)

    description = newsoup1.find_all("div",{"class":"inner"})
    list_description.append(description[0].text.replace("\n", ""))


Wall time: 0 ns


In [51]:
list_names = []
list_original_prices= []
list_current_prices = []
list_age = []
list_players = []
list_play_time = []
list_description = []
with ThreadPoolExecutor(3) as pool:
       results_all = pool.map(grab_all, list_test_url)


In [52]:
%%time
print(list_names)
print(list_original_prices)
print(list_current_prices)
print(list_age)
print(list_players)
print(list_play_time)
print(list_description)


['#Alternative Facts', '13 Minutes: The Cuban Missile Crisis', 'Abracada...What?', 'Angry Birds Chutes & Ladders Editon', 'Animal Act', 'Among The Stars: Expanding the Alliance', 'Apples to Apples', 'Awesome Kingdom: Tower of Hateskull', 'Arkham Horror', 'Better Me', 'Bachelorette Drink or Dare Dice', 'Betrayal at House on the Hill', 'Boss Monster', 'Blood Rage', 'Boss Monster 2: The Next Level', 'Buffy The Vampire Slayer: The Board Game', "Cash 'N Guns", 'Cartoon Network Crossover Crisis Deck-Building Game', 'Candy Land', 'Cat Tower', 'Censored Game', 'Chutes and Ladders', 'Chronology', 'Codenames Pictures', 'Clubs Card Game', 'Clank! A Deck Building Adventure', 'Council of Verona', 'Codenames: Duet', 'Colt Express', 'Coup', 'Dancing Eggs', 'Council of Verona: Corruption Expansion', 'Djubi Game', 'Dominion', "Don't Be A Loser", "Don't Be A Loser Adult Expansion", 'Dr. Beaker', 'Dr. Eureka Speed Logic Game', 'Dr. Microbe', 'Drunk Quest', 'Escape: Zombie City', "Flick 'Em Up", 'Flip Tri

In [53]:
list_names = pd.DataFrame(list_names)
list_original_prices = pd.DataFrame(list_original_prices)
list_current_prices = pd.DataFrame(list_current_prices)
list_age = pd.DataFrame(list_age)
list_players = pd.DataFrame(list_players)
list_play_time = pd.DataFrame(list_play_time)
list_description = pd.DataFrame(list_description)


In [54]:
Games_DF = pd.concat([list_names, list_original_prices,list_current_prices, list_age, list_players, list_play_time, list_description],
                     axis = 1, ignore_index=True)
Games_DF.columns = ["Game Name",
                   "Original Price",
                   "Current Price",
                   "Age of Players",
                   "Number of Players", 
                   "Play Time",
                   "Description"]
Games_DF

Unnamed: 0,Game Name,Original Price,Current Price,Age of Players,Number of Players,Play Time,Description
0,#Alternative Facts,$10.00,$9.00,Ages 14+,2-6 players,15-30 min,What's more fun than making up click-baiting h...
1,13 Minutes: The Cuban Missile Crisis,$10.99,$9.89,Ages 10+,2 players,13 min.,13 Minutes: The Cuban Missile Crisis is a card...
2,Abracada...What?,$34.99,$31.49,Ages 8+,2-5 players,20-30 min.,Abracada...what? is an exciting deduction game...
3,Angry Birds Chutes & Ladders Editon,$19.99,$17.99,Ages 3+,2-6 players,30 min.,The classic Chutes and Ladders game with an An...
4,Animal Act,$19.95,$17.95,Ages 4+,2-12 players,15 min.,Animal Act is a game that encourages kids to f...
5,Among The Stars: Expanding the Alliance,$29.95,$27.99,Ages 12+,2-4 players,30 min.,The second expansion for Among The Stars! Assu...
6,Apples to Apples,$26.99,$24.29,Ages 12+,4-10 players,30 min.,"Apples to Apples is THE easy to learn, laugh o..."
7,Awesome Kingdom: Tower of Hateskull,$29.99,$26.99,Ages 10+,2-4 players,30-45 min.,Compete to be the most Awesome adventurer in t...
8,Arkham Horror,$59.95,$53.96,Ages 12+,1-8 players,2-6 hours,It is the 1920's and unnatural things are happ...
9,Better Me,$80.00,$54.95,Ages 12+,2-8 players,30-180 min.,"If you are looking for a meaningful game, Bett..."


In [44]:
Games_DF.to_csv("Board_Games")

# Enjoy :)

## The paralisation has contributed to a high speed of the scraping.