In [4]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET

def fetch_game_data(game_id):
    """Fetch game data for a given game ID."""
    url = f'https://boardgamegeek.com/xmlapi2/thing?id={game_id}&stats=1'
    response = requests.get(url)
    while response.status_code != 200:
        response = requests.get(url)  # Retry fetching until success
    return response.text

def parse_game_data(game_xml):
    """Parse XML data for a single game."""
    root = ET.fromstring(game_xml)
    game_info = {}
    for item in root.findall('item'):
        game_info['id'] = item.get('id')
        game_info['name'] = item.find('name').get('value')
        statistics = item.find('statistics')
        ratings = statistics.find('ratings')
        game_info['average_rating'] = ratings.find('average').get('value')
        game_info['rank'] = ratings.find('ranks/rank').get('value')
    return game_info

def main():
    game_ids = range(1, 301)  # First 500 game IDs
    games_data = []

    for game_id in game_ids:
        game_xml = fetch_game_data(game_id)
        game_data = parse_game_data(game_xml)
        games_data.append(game_data)
        print(f"Fetched and parsed game ID: {game_id}")

    df = pd.DataFrame(games_data)
    print(df.head())
    df.to_csv('bgg_games_data.csv', index=False)

if __name__ == '__main__':
    main()


Fetched and parsed game ID: 1
Fetched and parsed game ID: 2
Fetched and parsed game ID: 3
Fetched and parsed game ID: 4
Fetched and parsed game ID: 5
Fetched and parsed game ID: 6
Fetched and parsed game ID: 7
Fetched and parsed game ID: 8
Fetched and parsed game ID: 9
Fetched and parsed game ID: 10
Fetched and parsed game ID: 11
Fetched and parsed game ID: 12
Fetched and parsed game ID: 13
Fetched and parsed game ID: 14
Fetched and parsed game ID: 15
Fetched and parsed game ID: 16
Fetched and parsed game ID: 17
Fetched and parsed game ID: 18
Fetched and parsed game ID: 19
Fetched and parsed game ID: 20
Fetched and parsed game ID: 21
Fetched and parsed game ID: 22
Fetched and parsed game ID: 23
Fetched and parsed game ID: 24
Fetched and parsed game ID: 25
Fetched and parsed game ID: 26
Fetched and parsed game ID: 27
Fetched and parsed game ID: 28
Fetched and parsed game ID: 29
Fetched and parsed game ID: 30
Fetched and parsed game ID: 31
Fetched and parsed game ID: 32
Fetched and parse

In [1]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET

def fetch_game_data(game_id):
    """Fetch game data for a given game ID."""
    url = f'https://boardgamegeek.com/xmlapi2/thing?id={game_id}&stats=1'
    response = requests.get(url)
    while response.status_code != 200:
        response = requests.get(url)  # Retry fetching until success
    return response.text

def parse_game_data(game_xml):
    """Parse XML data for a single game."""
    root = ET.fromstring(game_xml)
    game_info = {}
    for item in root.findall('item'):
        game_info['id'] = item.get('id')
        game_info['name'] = item.find('name').get('value')
        statistics = item.find('statistics')
        ratings = statistics.find('ratings')
        game_info['average_rating'] = ratings.find('average').get('value')
        game_info['rank'] = ratings.find('ranks/rank').get('value')
        
        # Additional fields
        game_info['min_players'] = item.find('minplayers').get('value')
        game_info['max_players'] = item.find('maxplayers').get('value')
        game_info['playing_time'] = item.find('playingtime').get('value')
        game_info['min_playtime'] = item.find('minplaytime').get('value')
        game_info['max_playtime'] = item.find('maxplaytime').get('value')
        
        # Publishers
        publishers = [link.get('value') for link in item.findall("link") if link.get('type') == 'boardgamepublisher']
        game_info['publishers'] = publishers

        # Categories
        categories = [link.get('value') for link in item.findall("link") if link.get('type') == 'boardgamecategory']
        game_info['categories'] = categories

    return game_info

def main():
    game_ids = range(1, 301)  # First 300 game IDs
    games_data = []

    for game_id in game_ids:
        game_xml = fetch_game_data(game_id)
        game_data = parse_game_data(game_xml)
        games_data.append(game_data)
        print(f"Fetched and parsed game ID: {game_id}")

    df = pd.DataFrame(games_data)
    print(df.head())
    df.to_csv('bgg_games_data.csv', index=False)

if __name__ == '__main__':
    main()


Fetched and parsed game ID: 1
Fetched and parsed game ID: 2
Fetched and parsed game ID: 3
Fetched and parsed game ID: 4
Fetched and parsed game ID: 5
Fetched and parsed game ID: 6
Fetched and parsed game ID: 7
Fetched and parsed game ID: 8
Fetched and parsed game ID: 9
Fetched and parsed game ID: 10
Fetched and parsed game ID: 11
Fetched and parsed game ID: 12
Fetched and parsed game ID: 13
Fetched and parsed game ID: 14
Fetched and parsed game ID: 15
Fetched and parsed game ID: 16
Fetched and parsed game ID: 17
Fetched and parsed game ID: 18
Fetched and parsed game ID: 19
Fetched and parsed game ID: 20
Fetched and parsed game ID: 21
Fetched and parsed game ID: 22
Fetched and parsed game ID: 23
Fetched and parsed game ID: 24
Fetched and parsed game ID: 25
Fetched and parsed game ID: 26
Fetched and parsed game ID: 27
Fetched and parsed game ID: 28
Fetched and parsed game ID: 29
Fetched and parsed game ID: 30
Fetched and parsed game ID: 31
Fetched and parsed game ID: 32
Fetched and parse

## Users

In [5]:

import time

In [6]:


def fetch_xml(url):
    """Utility function to fetch XML data from a URL."""
    response = requests.get(url)
    while response.status_code != 200:
        # Adding a brief pause to respect rate limits and avoid being blocked
        time.sleep(1)
        response = requests.get(url)
    return response.text

def extract_usernames_from_comments(game_id, max_usernames=50):
    """Extract usernames from comments of a specified game, limited by max_usernames."""
    usernames = []
    page = 1
    while len(usernames) < max_usernames:
        url = f'https://boardgamegeek.com/xmlapi2/thing?id={game_id}&ratingcomments=1&page={page}'
        xml_data = fetch_xml(url)
        root = ET.fromstring(xml_data)
        comments = root.findall('.//comment')
        if not comments:
            break  # Exit if no more comments are available
        for comment in comments:
            if len(usernames) >= max_usernames:
                break
            usernames.append(comment.get('username'))
        page += 1
    return usernames

def fetch_user_data(username):
    """Fetch detailed user data for a given username."""
    url = f'https://boardgamegeek.com/xmlapi2/user?name={username}'
    xml_data = fetch_xml(url)
    root = ET.fromstring(xml_data)
    user_info = {'username': username}
    user_info['id'] = root.find('user').get('id') if root.find('user') is not None else None
    return user_info

def save_usernames_to_txt(usernames, filename='usernames.txt'):
    """Save usernames to a text file."""
    with open(filename, 'w') as file:
        for username in usernames:
            file.write(f'{username}\n')
    print(f'Usernames saved to {filename}')

def main():
    # Example to fetch comments from game ID 13 (Catan)
    usernames = extract_usernames_from_comments(13)
    save_usernames_to_txt(usernames)  # Save usernames to a .txt file
    users_data = [fetch_user_data(username) for username in usernames[:50]]  # Fetch data for 50 usernames

    df = pd.DataFrame(users_data)
    print(df.head())
    df.to_csv('bgg_users_data.csv', index=False)

if __name__ == '__main__':
    main()


Usernames saved to usernames.txt
      username    id
0   sidehacker  None
1  Varthlokkur  None
2   dougthonus  None
3       cypar7  None
4      ssmooth  None


In [5]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time

def fetch_xml(url):
    """Utility function to fetch XML data from a URL."""
    response = requests.get(url)
    while response.status_code != 200:
        # Adding a brief pause to respect rate limits and avoid being blocked
        time.sleep(1)
        response = requests.get(url)
    return response.text

def extract_usernames_from_comments(game_id, max_usernames=50):
    """Extract usernames from comments of a specified game, limited by max_usernames."""
    usernames = []
    page = 1
    while len(usernames) < max_usernames:
        url = f'https://boardgamegeek.com/xmlapi2/thing?id={game_id}&ratingcomments=1&page={page}'
        xml_data = fetch_xml(url)
        root = ET.fromstring(xml_data)
        comments = root.findall('.//comment')
        if not comments:
            break  # Exit if no more comments are available
        for comment in comments:
            if len(usernames) >= max_usernames:
                break
            usernames.append(comment.get('username'))
        page += 1
    return usernames

def fetch_user_data(username):
    """Fetch detailed user data for a given username, including their game collection."""
    url = f'https://boardgamegeek.com/xmlapi2/user?name={username}&buddies=1&hot=1&top=1'
    user_xml = fetch_xml(url)
    user_root = ET.fromstring(user_xml)

    user_info = {
        'username': username,
        'id': user_root.find('user').get('id') if user_root.find('user') is not None else None,
        'games': []
    }

    # Fetching the user's game collection
    collection_url = f'https://boardgamegeek.com/xmlapi2/collection?username={username}&own=1'
    collection_xml = fetch_xml(collection_url)
    collection_root = ET.fromstring(collection_xml)
    for item in collection_root.findall('item'):
        game_name = item.find('name').text
        user_info['games'].append(game_name)

    return user_info

def save_usernames_to_txt(usernames, filename='usernames.txt'):
    """Save usernames to a text file."""
    with open(filename, 'w') as file:
        for username in usernames:
            file.write(f'{username}\n')
    print(f'Usernames saved to {filename}')

def main():
    # Example to fetch comments from game ID 13 (Catan)
    usernames = extract_usernames_from_comments(13)
    save_usernames_to_txt(usernames)  # Save usernames to a .txt file
    users_data = [fetch_user_data(username) for username in usernames[:50]]  # Fetch data for 50 usernames

    df = pd.DataFrame(users_data)
    print(df.head())
    df.to_csv('bgg_users_data.csv', index=False)

if __name__ == '__main__':
    main()


Usernames saved to usernames.txt
      username    id                                              games
0   sidehacker  None  [7 Wonders, 7 Wonders: Babel, Above and Below,...
1  Varthlokkur  None  [Acquire, Advanced Civilization, Agricola, Agr...
2   dougthonus  None  [Acquire, CATAN, Catan Card Game, CATAN: Citie...
3       cypar7  None  [Agricola, Bad Grandmas, Beer & Bread, The Big...
4      ssmooth  None  [6 Nimmt!, Abalone, Acquire, Advance to Boardw...
