In [17]:
import numpy as np
import pandas as pd
import math
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options

import json
from bs4 import BeautifulSoup
import requests
import re

In [18]:
"""Finds a description for each game in the dataset. Applies it to a dataframe."""
def find_description(df):
    url_list = df.bgg_url.values
    descriptions = []
    for url in url_list:
        response = requests.get(url)
        response.raise_for_status()
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')
        script_element = soup.find('script', type='application/ld+json').string
        data = json.loads(script_element)
        description = data.get('description')
        descriptions.append(description)
    df.insert(4, 'description', descriptions)
    return df

In [19]:
df2 = pd.read_csv('boardgames.csv', sep = ',')

In [14]:
"""Finds a price for each game in the dataset. Applies it to a dataframe."""
def find_price(df):
    url_list = df.bgg_url.values
    prices = np.zeros(len(url_list))
    
    driver_path = r"C:\Users\20meh\EdgeWebDriver\msedgedriver.exe"
    options = Options()
    options.add_argument('--headless')
    service = Service(driver_path)
    for i in range(len(url_list)):
        driver = webdriver.Edge(service=service, options=options)
        driver.get(url_list[i])
        html = driver.page_source
        driver.quit()
        soup = BeautifulSoup(html, 'html.parser')
        try:
            price_block = soup.find('li', class_ = 'summary-item summary-sale-item ng-scope')
            price = price_block.find('strong', class_ = 'ng-binding').string.replace(',', '.')
            price = float(re.findall(r"\d+\.\d+", price)[0])
        except:
            try:
                price_block = soup.find_all('li', class_ = 'summary-item summary-sale-item ng-scope')[1]
                price = price_block.find('strong', class_ = 'ng-binding').string.replace(',', '.')
                price = float(re.findall(r"\d+\.\d+", price)[0])
            except:
                price = None
        prices[i] = price
    df_copy = df.copy()
    df_copy['price'] = prices
    return df_copy

In [15]:
df3 = find_price(df2)
df3

Unnamed: 0,rank,bgg_url,game_id,names,description,min_players,max_players,avg_time,min_time,max_time,...,geek_rating,num_votes,image_url,age,mechanic,owned,category,designer,weight,price
0,1,https://boardgamegeek.com/boardgame/174430/glo...,174430,Gloomhaven,Vanquish monsters with strategic cardplay. Ful...,1,4,120,60,120,...,8.61858,15376,https://cf.geekdo-images.com/original/img/lDN3...,12,"Action / Movement Programming, Co-operative Pl...",25928,"Adventure, Exploration, Fantasy, Fighting, Min...",Isaac Childres,3.7543,155.98
1,2,https://boardgamegeek.com/boardgame/161936/pan...,161936,Pandemic Legacy: Season 1,Mutating diseases are spreading around the wor...,2,4,60,60,60,...,8.50163,26063,https://cf.geekdo-images.com/original/img/P_Sw...,13,"Action Point Allowance System, Co-operative Pl...",41605,"Environmental, Medical","Rob Daviau, Matt Leacock",2.8210,60.89
2,3,https://boardgamegeek.com/boardgame/182028/thr...,182028,Through the Ages: A New Story of Civilization,Rewrite history as you build up your civilizat...,2,4,240,180,240,...,8.30183,12352,https://cf.geekdo-images.com/original/img/1d2h...,14,"Action Point Allowance System, Auction/Bidding...",15848,"Card Game, Civilization, Economic",Vlaada Chvátil,4.3678,66.18
3,4,https://boardgamegeek.com/boardgame/167791/ter...,167791,Terraforming Mars,Compete with rival CEOs to make Mars habitable...,1,5,120,120,120,...,8.19914,26004,https://cf.geekdo-images.com/original/img/o8z_...,12,"Card Drafting, Hand Management, Set Collection...",33340,"Economic, Environmental, Industry / Manufactur...",Jacob Fryxelius,3.2456,78.92
4,5,https://boardgamegeek.com/boardgame/12333/twil...,12333,Twilight Struggle,Relive the Cold War and rewrite history in an ...,2,2,180,120,180,...,8.19787,31301,https://cf.geekdo-images.com/original/img/ZPnn...,13,"Area Control / Area Influence, Campaign / Batt...",42952,"Modern Warfare, Political, Wargame","Ananda Gupta, Jason Matthews",3.5518,54.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4994,4995,https://boardgamegeek.com/boardgame/6569/cronberg,6569,Cronberg,,2,4,20,20,20,...,5.64032,456,https://cf.geekdo-images.com/original/img/pBmQ...,8,"Area Enclosure, Tile Placement",561,"City Building, Print & Play","Tobias Goslar, Roland Goslar",1.9074,20.00
4995,4996,https://boardgamegeek.com/boardgame/42713/arca...,42713,Arcane Legions,,2,6,120,60,120,...,5.64030,277,https://cf.geekdo-images.com/original/img/Ea2O...,10,"Action Point Allowance System, Dice Rolling, V...",825,"Ancient, Collectible Components, Dice, Fantasy...","Tyler Bielman, Jonathon Loucks, Ethan Pasterna...",2.3750,29.95
4996,4997,https://boardgamegeek.com/boardgame/836/new-en...,836,New England Railways,,3,6,120,120,120,...,5.64026,146,https://cf.geekdo-images.com/original/img/2EXT...,13,"Auction/Bidding, Pick-up and Deliver",440,"Economic, Trains","John Bohrer, Martin Wallace",2.6250,22.00
4997,4998,https://boardgamegeek.com/boardgame/2997/soldi...,2997,Soldier Kings,,2,8,240,240,240,...,5.64024,257,https://cf.geekdo-images.com/original/img/eNqF...,14,"Point to Point Movement, Variable Player Powers",659,"Age of Reason, Wargame",Robert G. Markham,2.4359,11.00


In [16]:
df3.to_csv('boardgames_sale.csv', index = False)