In [76]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests

from credentials import get_credentials

# JogoNaMesa

In [77]:
def get_prices():
	'''
	Scrapes www.jogonamesa.pt for board game prices. Takes NO parameters.

	Returns a list of boardgame names, and pandas DataFrame containing those games' prices.
	'''

	session = requests.session()
	login_url = 'https://jogonamesa.pt/P/user_login.cgi'
	login = session.get(login_url, headers={'User-Agent': 'Mozilla/5.0'})
	login = session.post(
						 login_url,
						 data=get_credentials()
						)

	wishlist_url = 'https://jogonamesa.pt/P/user_wishlist.cgi'
	wishlist = session.get(wishlist_url)
	wishlist_html = wishlist.text
	wishlist_soup = BeautifulSoup(wishlist_html, features='html.parser')

	pages = wishlist_soup.find_all('a', class_='paginacao')
	n_pages = int(len(pages) / 2)
	wishlist_urls = [wishlist_url + '?accao=8&num={}'.format(str(page_number)) for page_number in range(1, n_pages + 1)]

	games = {}
	for url in wishlist_urls:

		page = session.get(url)
		page_html = page.content.decode('utf-8','ignore') #The decode() function here circumvents incorrectly decoded utf8 characters (mostly accented vowels)
		page_soup = BeautifulSoup(page_html, features='html.parser')
		name_blocks = page_soup.find_all('div', class_='wishlist_caracteristicas')
		price_blocks = page_soup.find_all('div', class_='wishlist_opcoes')

		for i, (name_block, price_block) in enumerate(zip(name_blocks, price_blocks)):
			name = name_block.a.string
			price_tags = price_block.find_all('a', 'botao')
			try:
				prices = []
				if len(price_tags) != 0:

					for tag in price_tags:
						price = tag.contents[1].split('€')[1]
						availability = tag.find_next('span').contents[0].contents[0].string
						if 'Sem prev' not in availability:
							prices.append(price)

					if len(prices) == 0:
						raise AttributeError

					games[name] = min(prices)

				else:
					raise AttributeError

			except AttributeError:
				games[name] = np.nan

	price_table = pd.DataFrame.from_dict(games, orient='index').reset_index()

	price_table.columns = ['name', 'JogoNaMesa']
	price_table['name'] = price_table['name'].astype('str')
	price_table['JogoNaMesa'] = price_table['JogoNaMesa'].astype('float')
	price_table.sort_values(by=['name'], inplace=True)
	price_table.reset_index(inplace=True)
	price_table.drop(columns=['index'], inplace=True)

	games_list = list(games.keys())

	return games_list, price_table

In [78]:
test = get_prices()

51st State: Ultimate Edition
7 Wonders Duel: Agora
Abyss: Leviathan
Android: Netrunner
Beasty Bar 3: Born to Be Wild
Café
Canvas
Captain Sonar: Upgrade One
Dead Men Tell No Tales: The Kraken
Expeditions
Forgotten Waters
Golem
Gutenberg
Last Will
Long Shot: The Dice Game
Mansions of Madness: Second Edition – Beyond the Threshold: Expansion
Mansions of Madness: Second Edition – Recurring Nightmares: Figure and Tile Collection
Mansions of Madness: Second Edition – Sanctum of Twilight: Expansion
Mansions of Madness: Second Edition – Suppressed Memories: Figure and Tile Collection
Menara: Rituals & Ruins
Mission Control: Critical Orbit
My Father's Work
Pandemic Legacy: Season 1
Paris: La Cité de la Lumière
Rococo: Jewelry Box
Scarface 1920
Scythe: The Rise of Fenris
Tannhäuser
The Voyages of Marco Polo: Agents of Venice
Trekking Through History
Vast: The Crystal Caverns
When I Dream
World Wonders
3 Ring Circus
51st State: Master Set
7 Wonders: Cities Anniversary Pack
7 Wonders: Leaders Anni

In [219]:
test[1]

Unnamed: 0,name,JogoNaMesa
0,10 Minute Heist: The Wizard's Tower,
1,1960: The Making of the President,
2,3 Ring Circus,
3,5-Minute Dungeon,
4,5-Minute Mystery,
...,...,...
533,Yedo,
534,Zendo,
535,Zombicide: Black Plague,
536,Zooloretto,


# GamePlay

In [217]:
def get_prices(list_of_games):
    '''
    Scrapes www.gameplay.pt for boardgame prices. Takes the folllowing parameters:

    list_of_games (list): a list containing games of boardgames. This list is iterated over
                          to find the correspond prices on the website.

    Returns a pandas DataFrame containing the prices of all games present in list_of_games.
    '''

    session = requests.session()

    games = {}
    for i, game in enumerate(list_of_games):
        try:
            game_query = game
            if ' ' in game:
                game_query = game.replace(' ', '+')
                
            gameplay_url = 'http://www.gameplay.pt/en/search?search_query=' + game_query
            print('url:', gameplay_url)
            gameplay_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; W…) Gecko/20100101 Firefox/65.0'.encode('utf-8')}
            gameplay_session = session.get(gameplay_url, headers=gameplay_headers)
            gameplay_text = gameplay_session.text
            gameplay_soup = BeautifulSoup(gameplay_text, features='html.parser')
            #print(gameplay_soup)
            search_results = gameplay_soup.find('a', class_="thumbnail product-thumbnail")
            print(search_results)
            print('results:', len(search_results), search_results['href'])
            href = search_results['href']

            game_page = session.get(href, headers=gameplay_headers)
            game_text = game_page.text
            game_soup = BeautifulSoup(game_text, features='html.parser')
            print(game_soup.find('div', class_='current-price').span.text[1:])

            prices = []
            print(prices)
            for result in search_results:
                name = result.a.string[1:-1]
                print(name)
                if ':' in game and ':' not in name:
                    try:
                        pre_colon = game.split(':')[0]
                        post_colon = game.split(':')[1]
                        pre_bracket = name.split('(')[0]
                        post_bracket = name.split('(')[1]
                    except IndexError:
                        raise ValueError
                    if pre_colon + post_colon == pre_bracket + post_bracket[:-1]:
                        price = result.span.string[1:-1].replace(',', '.')
                        prices.append(price)
                else:
                    if name == game:
                        price = result.span.string[1:-1].replace(',', '.')
                        prices.append(price)

            if len(prices) == 0:
                raise ValueError

            games[game] = min(prices)

        except ValueError as e:
            print(e)
            games[game] = np.nan
    print(games)
    price_table = pd.DataFrame.from_dict(games, orient='index').reset_index()
    price_table.columns = ['name', 'Gameplay']
    price_table['name'] = price_table['name'].astype('str')
    price_table['Gameplay'] = price_table['Gameplay'].astype('float')
    price_table.sort_values(by=['name'], inplace=True)
    price_table.reset_index(inplace=True)
    price_table.drop(columns=['index'], inplace=True)

    return price_table

In [218]:
test2 = get_prices(test[0][1:2])

url: http://www.gameplay.pt/en/search?search_query=7+Wonders+Duel:+Agora
<a class="thumbnail product-thumbnail" href="https://gameplay.pt/en/board-games/3592-preorder-7-wonders-duel-agora-5425016924402.html">
<img alt="7 Wonders Duel: Agora" data-full-size-image-url="https://gameplay.pt/13084-large_default/preorder-7-wonders-duel-agora.jpg" src="https://gameplay.pt/13084-home_default/preorder-7-wonders-duel-agora.jpg">
</img></a>
results: 2 https://gameplay.pt/en/board-games/3592-preorder-7-wonders-duel-agora-5425016924402.html
€21.95
[]


AttributeError: 'NavigableString' object has no attribute 'a'

In [191]:
test2

Unnamed: 0,name,Gameplay
0,51st State: Ultimate Edition,
1,7 Wonders Duel: Agora,
2,Abyss: Leviathan,
3,Android: Netrunner,
4,Beasty Bar 3: Born to Be Wild,
