In [3]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests
from unidecode import unidecode

from credentials import get_credentials

# JogoNaMesa

In [36]:
def get_prices():
	'''
	Scrapes www.jogonamesa.pt for board game prices. Takes NO parameters.

	Returns a list of boardgame names, and pandas DataFrame containing those games' prices.
	'''

	session = requests.session()
	login_url = 'https://jogonamesa.pt/P/user_login.cgi'
	login = session.get(login_url, headers={'User-Agent': 'Mozilla/5.0'})
	login = session.post(
						 login_url,
						 data=get_credentials()
						)

	wishlist_url = 'https://jogonamesa.pt/P/user_wishlist.cgi'
	wishlist = session.get(wishlist_url)
	wishlist_html = wishlist.text
	wishlist_soup = BeautifulSoup(wishlist_html, features='html.parser')

	pages = wishlist_soup.find_all('a', class_='paginacao')
	n_pages = int(len(pages) / 2)
	wishlist_urls = [wishlist_url + '?accao=8&num={}'.format(str(page_number)) for page_number in range(1, n_pages + 1)]

	games = {}
	for url in wishlist_urls:

		page = session.get(url)
		page_html = page.content.decode('utf-8','ignore') #The decode() function here circumvents incorrectly decoded utf8 characters (mostly accented vowels)
		page_soup = BeautifulSoup(page_html, features='html.parser')
		name_blocks = page_soup.find_all('div', class_='wishlist_caracteristicas')
		price_blocks = page_soup.find_all('div', class_='wishlist_opcoes')

		for i, (name_block, price_block) in enumerate(zip(name_blocks, price_blocks)):
			name = name_block.a.string
			price_tags = price_block.find_all('a', 'botao')

			try:
				prices = []
				if len(price_tags) != 0:

					for tag in price_tags:
						price = tag.contents[1].split('€')[1]
						availability = tag.find_next('span').contents[0].string
						if 'Sem prev' not in availability:
							prices.append(price)

					if len(prices) == 0:
						raise AttributeError

					games[name] = min(prices)

				else:
					raise AttributeError

			except AttributeError:
				games[name] = np.nan

	price_table = pd.DataFrame.from_dict(games, orient='index').reset_index()

	price_table.columns = ['name', 'JogoNaMesa']
	price_table['name'] = price_table['name'].astype('str')
	price_table['JogoNaMesa'] = price_table['JogoNaMesa'].astype('float')
	price_table.sort_values(by=['name'], inplace=True)
	price_table.reset_index(inplace=True)
	price_table.drop(columns=['index'], inplace=True)

	games_list = list(games.keys())

	return games_list, price_table

In [37]:
jogonamesa = get_prices()

# GamePlay

In [50]:
def get_prices(list_of_games):
    '''
    Scrapes www.gameplay.pt for boardgame prices. Takes the following parameters:

    list_of_games (list): a list containing games of boardgames. This list is iterated over
                          to find the correspond prices on the website.

    Returns a pandas DataFrame containing the prices of all games present in list_of_games.
    '''

    session = requests.session()

    games = {}
    for i, game in enumerate(list_of_games):
        try:
            game_query = game
            if ' ' in game:
                game_query = game.replace(' ', '+')
                
            gameplay_url = 'http://www.gameplay.pt/en/search?search_query=' + game_query
            print('url:', gameplay_url)
            gameplay_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; W…) Gecko/20100101 Firefox/65.0'.encode('utf-8')}
            gameplay_session = session.get(gameplay_url, headers=gameplay_headers)
            gameplay_text = gameplay_session.text
            gameplay_soup = BeautifulSoup(gameplay_text, features='html.parser')

            search_results = gameplay_soup.find_all('a', class_="thumbnail product-thumbnail")

            for result in search_results:

                game_url = result.img['data-full-size-image-url']
                print(result.img['data-full-size-image-url'])

                print('game_name:', unidecode(game.replace('Ultimate Edition', 'Master Set').lower()).replace('-', '').replace(' ', '-').replace(':', ''))
                curated_game_name = unidecode(game.replace('Ultimate Edition', 'Master Set').lower()).replace('-', '').replace(' ', '-').replace(':', '')

                print('game_url:', game_url.split('/')[-1].split('.')[0], '\n')
                curated_game_url = game_url.split('/')[-1].split('.')[0]
                
                # Fixes issues with "7 Wonders Duel: Agora" and "Aquatica" urls
                if 'preorder' in curated_game_url:
                    curated_game_url = curated_game_url.split('preorder-')[1]
                    print('\n\nExtra:', curated_game_url, '\n\n')

                if curated_game_name == curated_game_url:
                    
                    #print('\n#### Found ######\n')

                    game_page = session.get(result['href'], headers=gameplay_headers)
                    game_text = game_page.text
                    game_soup = BeautifulSoup(game_text, features='html.parser')
                    price = game_soup.find('div', class_='current-price').span.text[1:]
                    break

                else:
                    price = np.nan

            games[game] = price

        except TypeError as raised_error:
            print(raised_error)
            games[game] = np.nan

    price_table = pd.DataFrame.from_dict(games, orient='index').reset_index()
    price_table.columns = ['name', 'Gameplay']
    price_table['name'] = price_table['name'].astype('str')
    price_table['Gameplay'] = price_table['Gameplay'].astype('float')
    price_table.sort_values(by=['name'], inplace=True)
    price_table.reset_index(inplace=True)
    price_table.drop(columns=['index'], inplace=True)

    return price_table

In [51]:
gameplay = get_prices(jogonamesa[0])

url: http://www.gameplay.pt/en/search?search_query=51st+State:+Ultimate+Edition
https://gameplay.pt/19959-large_default/51st-state-master-set.jpg
game_name: 51st-state-master-set
game_url: 51st-state-master-set 


#### Found ######

url: http://www.gameplay.pt/en/search?search_query=7+Wonders+Duel:+Agora
https://gameplay.pt/13084-large_default/preorder-7-wonders-duel-agora.jpg
game_name: 7-wonders-duel-agora
game_url: preorder-7-wonders-duel-agora 



Extra: 7-wonders-duel-agora 



#### Found ######

url: http://www.gameplay.pt/en/search?search_query=Abyss:+Leviathan
https://gameplay.pt/11999-large_default/arkham-horror-the-card-game-scenario-pack-guardians-of-the-abyss.jpg
game_name: abyss-leviathan
game_url: arkham-horror-the-card-game-scenario-pack-guardians-of-the-abyss 

https://gameplay.pt/12245-large_default/abyss.jpg
game_name: abyss-leviathan
game_url: abyss 

https://gameplay.pt/12272-large_default/arkham-horror-the-card-game-c5p1-the-search-for-kadath.jpg
game_name: abyss-l

# JogarTabuleiro

In [46]:
def get_prices_jt(list_of_games):
	'''
	Scrapes www.jogartabuleiro.pt for board game prices. Takes the following parameters:

    list_of_games (list): a list containing games of boardgames. This list is iterated over
                          to find the correspond prices on the website.

    Returns a pandas DataFrame containing the prices of all games present in list_of_games.
	'''

	session = requests.session()

	games = {}
	for i, game in enumerate(list_of_games):
		print('\n', game)
		try:
			game_query = game
			if ' ' in game:
				game_query = game.replace(' ', '-')
			if ':' in game_query:
				game_query = game_query.replace(':', '')
				
			jogartabuleiro_url = 'https://jogartabuleiro.pt/produto/' + game_query
			print('url:', jogartabuleiro_url)
			jogartabuleiro_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; W…) Gecko/20100101 Firefox/65.0'.encode('utf-8')}
			jogartabuleiro_session = session.get(jogartabuleiro_url, headers=jogartabuleiro_headers)
			jogartabuleiro_text = jogartabuleiro_session.text
			jogartabuleiro_soup = BeautifulSoup(jogartabuleiro_text, features='html.parser')
			#print('soup:', jogartabuleiro_soup)

			price_results = jogartabuleiro_soup.find_all('span', class_='woocommerce-Price-amount amount')
			rental_results = jogartabuleiro_soup.find_all('div', itemprop='description')
			language_results = jogartabuleiro_soup.find_all('td', class_='woocommerce-product-attributes-item__value')

			description = rental_results[0].text
			game_language = language_results[0].text.strip()

			if 'ALUGUER' in description:
				print('Rental only')
				raise IndexError
			
			if game_language != 'Inglês':
				print('Je ne parle pas français')
				raise IndexError

			price = price_results[1].text[1:]

			games[game] = price
		
		except IndexError as raised_error:
			print('Game not found:', raised_error)
			games[game] = np.nan

	price_table = pd.DataFrame.from_dict(games, orient='index').reset_index()
	price_table.columns = ['name', 'JogarTabuleiro']
	price_table['name'] = price_table['name'].astype('str')
	price_table['JogarTabuleiro'] = price_table['JogarTabuleiro'].astype('float')
	price_table.sort_values(by=['name'], inplace=True)
	price_table.reset_index(inplace=True)
	price_table.drop(columns=['index'], inplace=True)

	return price_table

In [47]:
jogartabuleiro = get_prices_jt(jogonamesa[0])


 51st State: Ultimate Edition
url: https://jogartabuleiro.pt/produto/51st-State-Ultimate-Edition
Game not found: list index out of range

 7 Wonders Duel: Agora
url: https://jogartabuleiro.pt/produto/7-Wonders-Duel-Agora
Je ne parle pas français
Game not found: 

 Abyss: Leviathan
url: https://jogartabuleiro.pt/produto/Abyss-Leviathan
Game not found: list index out of range

 Android: Netrunner
url: https://jogartabuleiro.pt/produto/Android-Netrunner
Je ne parle pas français
Game not found: 

 Beasty Bar 3: Born to Be Wild
url: https://jogartabuleiro.pt/produto/Beasty-Bar-3-Born-to-Be-Wild
Game not found: list index out of range

 Café
url: https://jogartabuleiro.pt/produto/Café
Game not found: list index out of range

 Canvas
url: https://jogartabuleiro.pt/produto/Canvas
Game not found: list index out of range

 Captain Sonar: Upgrade One
url: https://jogartabuleiro.pt/produto/Captain-Sonar-Upgrade-One
Game not found: list index out of range

 Dead Men Tell No Tales: The Kraken
url: 