In [11]:
from dataclasses import dataclass
import xml.etree.ElementTree as xe

@dataclass
class BoardGameInfo:
    id: int
    name: str
    numweights: int
    averageweight: float
    
    @staticmethod
    def from_item(item: xe) -> BoardGameInfo:
        id = item.attrib['id']
        name = item.find('./name[@type=\'primary\']').attrib['value']
        ratings = item.find('./statistics/ratings')
        
        return BoardGameInfo(id, 
                             name, 
                             ratings.find('./numweights').attrib['value'], 
                             ratings.find('./averageweight').attrib['value']
                            ) 
    

In [12]:
import aiohttp
import asyncio

bgg_api_root = 'https://boardgamegeek.com/xmlapi2'

async def fill_dataset():
    async with aiohttp.ClientSession() as client:
        async def get_boardgamegeek_thing(*thing_ids):
            url = f'{bgg_api_root}/thing?id={",".join(str(x) for x in thing_ids)}&type=boardgame&pagesize=100&stats=1'
            async with client.get(url) as response:
                assert response.status == 200
                return (await response.read()).decode('utf-8')
            
        async def get_boardgamegeek_rulebook(thing):
            ...
        
        async def get_bg_info(*thing_ids):    
            '''
            get a list of `BoardGameInfo` that corresponds to the thing_ids requested. The list could be smaller than `len(thing_ids)`
            because filtered using `type=boardgame` and there must be at least one vote about weight
            '''
            assert len(thing_ids) <= 100

            bg_infos = []
            items = xe.fromstring(await get_boardgamegeek_thing(*thing_ids))

            for item in items:
                bg_infos.append(BoardGameInfo.from_item(item))

            return bg_infos
        
        file_lock = asyncio.Lock()
        with open('dataset.txt', 'w') as f_out:
            async def write_bg_info_on_file(*thing_ids):
                bg_infos = await get_bg_info(*thing_ids)
                async with file_lock:
                    f_out.write('\n'.join(map(str, bg_infos)) + '\n')
                    
            await asyncio.gather(*[write_bg_info_on_file(*[x + 100 * i for x in range(100)]) for i in range(2)])
                
    
await fill_dataset()