In [1]:
# Imports
import pandas as pd
import requests
from xml.etree import ElementTree as ET

In [2]:
# Read csv
df = pd.read_csv('initial_scrape_dataframe')
df.head()

Unnamed: 0,rank,title,game id
0,1,Brass: Birmingham,224517
1,2,Pandemic Legacy: Season 1,161936
2,3,Ark Nova,342942
3,4,Gloomhaven,174430
4,5,Twilight Imperium: Fourth Edition,233078


In [3]:
# Create list of game IDs for iteration
game_ids = df['game id'].to_list()
game_ids

[224517,
 161936,
 342942,
 174430,
 233078,
 316554,
 167791,
 115746,
 187645,
 162886,
 397598,
 291457,
 220308,
 12333,
 182028,
 84876,
 193738,
 169786,
 246900,
 173346,
 28720,
 295770,
 167355,
 266507,
 177736,
 124361,
 341169,
 312484,
 205637,
 237182,
 192135,
 120677,
 266192,
 164928,
 373106,
 96848,
 251247,
 421006,
 324856,
 199792,
 183394,
 338960,
 366013,
 321608,
 285774,
 521,
 284378,
 175914,
 256960,
 247763,
 418059,
 3076,
 253344,
 295947,
 314040,
 102794,
 185343,
 184267,
 170216,
 31260,
 251661,
 383179,
 255984,
 161533,
 231733,
 221107,
 182874,
 205059,
 126163,
 2651,
 365717,
 244521,
 216132,
 35677,
 266810,
 276025,
 125153,
 164153,
 124742,
 414317,
 200680,
 209010,
 284083,
 390092,
 371942,
 55690,
 28143,
 230802,
 157354,
 201808,
 332772,
 159675,
 72125,
 322289,
 380607,
 240980,
 191189,
 366161,
 93,
 110327]

In [4]:
# Search for game id with API
url = "https://boardgamegeek.com/xmlapi/boardgame/"
query = f"{game_ids[0]}?stats=1"

response = requests.get(url+query)
response

<Response [200]>

In [5]:
# Parse XML response with ElementTree
root = ET.fromstring(response.content)

In [6]:
# Pull data from XML and create dictionary list
game_list = []
for game in root.findall('boardgame'):
    # Object ID
    object_id = game.attrib['objectid']
    # Year Published
    year_published = game.find('yearpublished').text
    # Min Players
    min_players = game.find('minplayers').text
    # Max Players
    max_players = game.find('maxplayers').text
    # Best # of Players
    best_num_players = game.find('poll-summary').find('result').attrib['value']
    # Min Playtime
    min_playtime = game.find('minplaytime').text
    # Max Playtime
    max_playtime = game.find('maxplaytime').text
    # Age
    age = game.find('age').text
    # Mechanics
    mechanics = []
    for mechanic in game.findall('boardgamemechanic'):
        mechanics.append(mechanic.text)
    # Categories
    categories = []
    for category in game.findall('boardgamecategory'):
        categories.append(category.text)

    # Ratings section variable
    ratings = game.find('statistics').find('ratings')
    # Average weight
    avg_weight = ratings.find('averageweight').text
    # Number of user ratings
    num_ratings = ratings.find('usersrated').text
    # Average user rating
    users_rating = ratings.find('average').text
    # Average bgg rating (bayesian)
    bgg_rating = ratings.find('bayesaverage').text

    # Create dictionary
    game_dict = {
        "game id": object_id,
        "BGG Rating": bgg_rating,
        "Users Rating": users_rating,
        "Average Weight": avg_weight,
        "Number of Ratings": num_ratings,
        "Year Published": year_published,
        "Min Players": min_players,
        "Max Players": max_players,
        "Best Player Count": best_num_players,
        "Min Playtime": min_playtime,
        "Max Playtime": max_playtime,
        "Recommended Age": age,
        "Mechanics": mechanics,
        "Categories": categories
    }
    # Append to list
    game_list.append(game_dict)

In [7]:
# Create panda dataframe
game_df = pd.DataFrame(game_list)
game_df

Unnamed: 0,game id,BGG Rating,Users Rating,Average Weight,Number of Ratings,Year Published,Min Players,Max Players,Best Player Count,Min Playtime,Max Playtime,Recommended Age,Mechanics,Categories
0,224517,8.40171,8.57641,3.8685,52400,2018,2,4,Best with 3–4 players,60,120,14,"[Chaining, End Game Bonuses, Hand Management, ...","[Age of Reason, Economic, Industry / Manufactu..."


In [8]:
# Function to create dictionary for each game
def create_dict(root):
    for game in root.findall('boardgame'):
        # Object ID
        object_id = game.attrib['objectid']
        # Year Published
        year_published = game.find('yearpublished').text
        # Min Players
        min_players = game.find('minplayers').text
        # Max Players
        max_players = game.find('maxplayers').text
        # Best # of Players
        best_num_players = game.find('poll-summary').find('result').attrib['value']
        # Min Playtime
        min_playtime = game.find('minplaytime').text
        # Max Playtime
        max_playtime = game.find('maxplaytime').text
        # Age
        age = game.find('age').text
        # Mechanics
        mechanics = []
        for mechanic in game.findall('boardgamemechanic'):
            mechanics.append(mechanic.text)
        # Categories
        categories = []
        for category in game.findall('boardgamecategory'):
            categories.append(category.text)

        # Ratings section variable
        ratings = game.find('statistics').find('ratings')
        # Average weight
        avg_weight = ratings.find('averageweight').text
        # Number of user ratings
        num_ratings = ratings.find('usersrated').text
        # Average user rating
        users_rating = ratings.find('average').text
        # Average bgg rating (bayesian)
        bgg_rating = ratings.find('bayesaverage').text

        # Create dictionary
        game_dict = {
            "game id": object_id,
            "BGG Rating": bgg_rating,
            "Users Rating": users_rating,
            "Average Weight": avg_weight,
            "Number of Ratings": num_ratings,
            "Year Published": year_published,
            "Min Players": min_players,
            "Max Players": max_players,
            "Best Player Count": best_num_players,
            "Min Playtime": min_playtime,
            "Max Playtime": max_playtime,
            "Recommended Age": age,
            "Mechanics": mechanics,
            "Categories": categories
        }
    return game_dict

In [9]:
# Redo list and attempt with multiple queries
games_list = []
# Iterate through game id list
for id in game_ids:
    # Perform query
    query = f"{id}?stats=1"
    response = requests.get(url+query)
    # Parse data
    root = ET.fromstring(response.content)
    # Create dictionary and append list
    new_dict = create_dict(root)
    games_list.append(new_dict)

In [10]:
# Create dataframe from games list
games_df = pd.DataFrame(games_list)
games_df

Unnamed: 0,game id,BGG Rating,Users Rating,Average Weight,Number of Ratings,Year Published,Min Players,Max Players,Best Player Count,Min Playtime,Max Playtime,Recommended Age,Mechanics,Categories
0,224517,8.40171,8.57641,3.8685,52400,2018,2,4,Best with 3–4 players,60,120,14,"[Chaining, End Game Bonuses, Hand Management, ...","[Age of Reason, Economic, Industry / Manufactu..."
1,161936,8.36123,8.51463,2.8296,55686,2015,2,4,Best with 4 players,60,60,13,"[Action Points, Cooperative Game, Hand Managem...","[Environmental, Medical]"
2,342942,8.34744,8.5367,3.7815,52904,2021,1,4,Best with 2 players,90,150,14,"[Action Queue, End Game Bonuses, Grid Coverage...","[Animals, Economic, Environmental]"
3,174430,8.32289,8.56163,3.9143,64999,2017,1,4,Best with 3 players,60,120,14,"[Action Queue, Action Retrieval, Campaign / Ba...","[Adventure, Exploration, Fantasy, Fighting, Mi..."
4,233078,8.22732,8.57652,4.3349,26210,2017,3,6,Best with 6 players,240,480,14,"[Action Drafting, Area-Impulse, Dice Rolling, ...","[Civilization, Economic, Exploration, Negotiat..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,240980,7.59353,8.36943,3.0351,8308,2022,6,21,Best with 9–12 players,30,120,15,"[Betting and Bluffing, Deduction, Hidden Roles...","[Bluffing, Deduction, Horror, Murder / Mystery..."
96,191189,7.59351,7.87755,2.8048,22538,2016,1,4,Best with 2 players,60,60,14,"[Chit-Pull System, Cooperative Game, Deck, Bag...","[Card Game, Fantasy, Fighting, Science Fiction]"
97,366161,7.5877,8.23848,2.6754,8265,2022,1,2,Best with 2 players,40,70,10,"[Dice Rolling, End Game Bonuses, Hand Manageme...","[Animals, Economic, Educational]"
98,93,7.58714,7.76475,2.9401,31223,1995,2,5,Best with 5 players,60,120,12,"[Action Drafting, Area Majority / Influence, A...","[Medieval, Renaissance]"
